{
  "cells": [
    {
      "cell_type": "code",
      "execution_count": 122,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "v_pOHCrfQEGi",
        "outputId": "889d97f4-8e9f-444d-b9cc-8ba29d9eeede"
      },
      "outputs": [
        {
          "name": "stdout",
          "output_type": "stream",
          "text": [
            "Requirement already satisfied: pymongo in /Users/nithi/anaconda3/lib/python3.11/site-packages (4.9.1)\n",
            "Requirement already satisfied: dnspython<3.0.0,>=1.16.0 in /Users/nithi/anaconda3/lib/python3.11/site-packages (from pymongo) (2.6.1)\n"
          ]
        }
      ],
      "source": [
        "!pip install pymongo\n"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "X3PV8gtHVpsY"
      },
      "source": []
    },
    {
      "cell_type": "code",
      "execution_count": 123,
      "metadata": {
        "id": "cdV_Bg9TRaq3"
      },
      "outputs": [],
      "source": [
        "from collections import defaultdict\n",
        "import json, math\n",
        "import numpy as np\n",
        "import pandas as pd\n",
        "import plotly.express as px\n",
        "from tqdm import tqdm\n",
        "import requests\n",
        "pd.options.display.float_format = '{:.2f}'.format\n",
        "import pymongo"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 124,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 424
        },
        "id": "VjmbCCnXSjXS",
        "outputId": "8585afd6-9860-446f-dce6-385c2adcbc14"
      },
      "outputs": [
        {
          "data": {
            "text/html": [
              "<div>\n",
              "<style scoped>\n",
              "    .dataframe tbody tr th:only-of-type {\n",
              "        vertical-align: middle;\n",
              "    }\n",
              "\n",
              "    .dataframe tbody tr th {\n",
              "        vertical-align: top;\n",
              "    }\n",
              "\n",
              "    .dataframe thead th {\n",
              "        text-align: right;\n",
              "    }\n",
              "</style>\n",
              "<table border=\"1\" class=\"dataframe\">\n",
              "  <thead>\n",
              "    <tr style=\"text-align: right;\">\n",
              "      <th></th>\n",
              "      <th>Prompt</th>\n",
              "      <th>Agent_A</th>\n",
              "      <th>Agent_B</th>\n",
              "      <th>Rating</th>\n",
              "    </tr>\n",
              "  </thead>\n",
              "  <tbody>\n",
              "    <tr>\n",
              "      <th>0</th>\n",
              "      <td>The prompts are:\\n\\n1. \"What is the best food ...</td>\n",
              "      <td>{'Agent name': 'langchain brave-search agent (...</td>\n",
              "      <td>{'Agent name': 'langchain google-serper search...</td>\n",
              "      <td>A is better</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>1</th>\n",
              "      <td>The prompt is \"\" (an empty string).</td>\n",
              "      <td>{'Agent name': 'langchain brave-search agent (...</td>\n",
              "      <td>{'Agent name': 'langchain google-serper search...</td>\n",
              "      <td>A is better</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>2</th>\n",
              "      <td>The prompt in the executed code is \"\".</td>\n",
              "      <td>{'Agent name': 'langchain brave-search agent (...</td>\n",
              "      <td>{'Agent name': 'langchain google-serper search...</td>\n",
              "      <td>A is better</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>3</th>\n",
              "      <td>The prompt in the given executed code is \"\" (a...</td>\n",
              "      <td>{'Agent name': 'langchain brave-search agent (...</td>\n",
              "      <td>{'Agent name': 'langchain google-serper search...</td>\n",
              "      <td>B is better</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>4</th>\n",
              "      <td>Write sqlite query to get top 10 rows from the...</td>\n",
              "      <td>{'Agent name': 'sql agent plotter langchain (g...</td>\n",
              "      <td>{'Agent name': 'langchain ArXiv Article Fetche...</td>\n",
              "      <td>B is better</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>...</th>\n",
              "      <td>...</td>\n",
              "      <td>...</td>\n",
              "      <td>...</td>\n",
              "      <td>...</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>2098</th>\n",
              "      <td>\"what was AAPL stock yesterday\"</td>\n",
              "      <td>{'Agent name': 'langchain alpha-vantage stock ...</td>\n",
              "      <td>{'Agent name': 'langchain alpha-vantage stock ...</td>\n",
              "      <td>A is better</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>2099</th>\n",
              "      <td>\"I want to know what the big thing in ML is re...</td>\n",
              "      <td>{'Agent name': 'langchain ArXiv Article Fetche...</td>\n",
              "      <td>{'Agent name': 'llamaindex ArXiv Article Fetch...</td>\n",
              "      <td>A is better</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>2100</th>\n",
              "      <td>\"I want to generate artwork of a cat in the si...</td>\n",
              "      <td>{'Agent name': 'langchain Dall-E Image Generat...</td>\n",
              "      <td>{'Agent name': 'langchain google-serper search...</td>\n",
              "      <td>Both are bad</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>2101</th>\n",
              "      <td>\"I want to generate artwork of a cat in the si...</td>\n",
              "      <td>{'Agent name': 'langchain Wikipedia (gpt-4-tur...</td>\n",
              "      <td>{'Agent name': 'langchain Dall-E Image Generat...</td>\n",
              "      <td>Both are bad</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>2102</th>\n",
              "      <td>\"how do the rates of motor vehicle accidents p...</td>\n",
              "      <td>{'Agent name': 'langchain google-serper search...</td>\n",
              "      <td>{'Agent name': 'langchain Wolfram Alpha (claud...</td>\n",
              "      <td>A is better</td>\n",
              "    </tr>\n",
              "  </tbody>\n",
              "</table>\n",
              "<p>2103 rows × 4 columns</p>\n",
              "</div>"
            ],
            "text/plain": [
              "                                                 Prompt  \\\n",
              "0     The prompts are:\\n\\n1. \"What is the best food ...   \n",
              "1                   The prompt is \"\" (an empty string).   \n",
              "2                The prompt in the executed code is \"\".   \n",
              "3     The prompt in the given executed code is \"\" (a...   \n",
              "4     Write sqlite query to get top 10 rows from the...   \n",
              "...                                                 ...   \n",
              "2098                    \"what was AAPL stock yesterday\"   \n",
              "2099  \"I want to know what the big thing in ML is re...   \n",
              "2100  \"I want to generate artwork of a cat in the si...   \n",
              "2101  \"I want to generate artwork of a cat in the si...   \n",
              "2102  \"how do the rates of motor vehicle accidents p...   \n",
              "\n",
              "                                                Agent_A  \\\n",
              "0     {'Agent name': 'langchain brave-search agent (...   \n",
              "1     {'Agent name': 'langchain brave-search agent (...   \n",
              "2     {'Agent name': 'langchain brave-search agent (...   \n",
              "3     {'Agent name': 'langchain brave-search agent (...   \n",
              "4     {'Agent name': 'sql agent plotter langchain (g...   \n",
              "...                                                 ...   \n",
              "2098  {'Agent name': 'langchain alpha-vantage stock ...   \n",
              "2099  {'Agent name': 'langchain ArXiv Article Fetche...   \n",
              "2100  {'Agent name': 'langchain Dall-E Image Generat...   \n",
              "2101  {'Agent name': 'langchain Wikipedia (gpt-4-tur...   \n",
              "2102  {'Agent name': 'langchain google-serper search...   \n",
              "\n",
              "                                                Agent_B        Rating  \n",
              "0     {'Agent name': 'langchain google-serper search...   A is better  \n",
              "1     {'Agent name': 'langchain google-serper search...   A is better  \n",
              "2     {'Agent name': 'langchain google-serper search...   A is better  \n",
              "3     {'Agent name': 'langchain google-serper search...   B is better  \n",
              "4     {'Agent name': 'langchain ArXiv Article Fetche...   B is better  \n",
              "...                                                 ...           ...  \n",
              "2098  {'Agent name': 'langchain alpha-vantage stock ...   A is better  \n",
              "2099  {'Agent name': 'llamaindex ArXiv Article Fetch...   A is better  \n",
              "2100  {'Agent name': 'langchain google-serper search...  Both are bad  \n",
              "2101  {'Agent name': 'langchain Dall-E Image Generat...  Both are bad  \n",
              "2102  {'Agent name': 'langchain Wolfram Alpha (claud...   A is better  \n",
              "\n",
              "[2103 rows x 4 columns]"
            ]
          },
          "execution_count": 124,
          "metadata": {},
          "output_type": "execute_result"
        }
      ],
      "source": [
        "import pandas as pd\n",
        "\n",
        "# Load the ratings data from the JSON files\n",
        "ratings = pd.read_json('agent_ratings_V0.json')\n",
        "toolratings = pd.read_json('toolratings_V0.json')\n",
        "frameworkratings = pd.read_json('frameworkratings_V0.json')\n",
        "modelratings = pd.read_json('modelratings_V0.json')\n",
        "\n",
        "# Display the first few rows to verify the data is loaded correctly\n",
        "ratings\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 125,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 417
        },
        "id": "2IVBFCVDSyBT",
        "outputId": "a3e9fedf-fb4f-424e-f000-3cf6471d7f3f"
      },
      "outputs": [
        {
          "data": {
            "application/vnd.plotly.v1+json": {
              "config": {
                "plotlyServerURL": "https://plot.ly"
              },
              "data": [
                {
                  "alignmentgroup": "True",
                  "hovertemplate": "variable=count<br>Rating=%{x}<br>value=%{y}<extra></extra>",
                  "legendgroup": "count",
                  "marker": {
                    "color": "#636efa",
                    "pattern": {
                      "shape": ""
                    }
                  },
                  "name": "count",
                  "offsetgroup": "count",
                  "orientation": "v",
                  "showlegend": true,
                  "textposition": "auto",
                  "texttemplate": "%{y}",
                  "type": "bar",
                  "x": [
                    "A is better",
                    "B is better",
                    "Tie",
                    "Both are bad"
                  ],
                  "xaxis": "x",
                  "y": [
                    875,
                    752,
                    258,
                    218
                  ],
                  "yaxis": "y"
                }
              ],
              "layout": {
                "barmode": "relative",
                "height": 400,
                "legend": {
                  "title": {
                    "text": "variable"
                  },
                  "tracegroupgap": 0
                },
                "showlegend": false,
                "template": {
                  "data": {
                    "bar": [
                      {
                        "error_x": {
                          "color": "#2a3f5f"
                        },
                        "error_y": {
                          "color": "#2a3f5f"
                        },
                        "marker": {
                          "line": {
                            "color": "#E5ECF6",
                            "width": 0.5
                          },
                          "pattern": {
                            "fillmode": "overlay",
                            "size": 10,
                            "solidity": 0.2
                          }
                        },
                        "type": "bar"
                      }
                    ],
                    "barpolar": [
                      {
                        "marker": {
                          "line": {
                            "color": "#E5ECF6",
                            "width": 0.5
                          },
                          "pattern": {
                            "fillmode": "overlay",
                            "size": 10,
                            "solidity": 0.2
                          }
                        },
                        "type": "barpolar"
                      }
                    ],
                    "carpet": [
                      {
                        "aaxis": {
                          "endlinecolor": "#2a3f5f",
                          "gridcolor": "white",
                          "linecolor": "white",
                          "minorgridcolor": "white",
                          "startlinecolor": "#2a3f5f"
                        },
                        "baxis": {
                          "endlinecolor": "#2a3f5f",
                          "gridcolor": "white",
                          "linecolor": "white",
                          "minorgridcolor": "white",
                          "startlinecolor": "#2a3f5f"
                        },
                        "type": "carpet"
                      }
                    ],
                    "choropleth": [
                      {
                        "colorbar": {
                          "outlinewidth": 0,
                          "ticks": ""
                        },
                        "type": "choropleth"
                      }
                    ],
                    "contour": [
                      {
                        "colorbar": {
                          "outlinewidth": 0,
                          "ticks": ""
                        },
                        "colorscale": [
                          [
                            0,
                            "#0d0887"
                          ],
                          [
                            0.1111111111111111,
                            "#46039f"
                          ],
                          [
                            0.2222222222222222,
                            "#7201a8"
                          ],
                          [
                            0.3333333333333333,
                            "#9c179e"
                          ],
                          [
                            0.4444444444444444,
                            "#bd3786"
                          ],
                          [
                            0.5555555555555556,
                            "#d8576b"
                          ],
                          [
                            0.6666666666666666,
                            "#ed7953"
                          ],
                          [
                            0.7777777777777778,
                            "#fb9f3a"
                          ],
                          [
                            0.8888888888888888,
                            "#fdca26"
                          ],
                          [
                            1,
                            "#f0f921"
                          ]
                        ],
                        "type": "contour"
                      }
                    ],
                    "contourcarpet": [
                      {
                        "colorbar": {
                          "outlinewidth": 0,
                          "ticks": ""
                        },
                        "type": "contourcarpet"
                      }
                    ],
                    "heatmap": [
                      {
                        "colorbar": {
                          "outlinewidth": 0,
                          "ticks": ""
                        },
                        "colorscale": [
                          [
                            0,
                            "#0d0887"
                          ],
                          [
                            0.1111111111111111,
                            "#46039f"
                          ],
                          [
                            0.2222222222222222,
                            "#7201a8"
                          ],
                          [
                            0.3333333333333333,
                            "#9c179e"
                          ],
                          [
                            0.4444444444444444,
                            "#bd3786"
                          ],
                          [
                            0.5555555555555556,
                            "#d8576b"
                          ],
                          [
                            0.6666666666666666,
                            "#ed7953"
                          ],
                          [
                            0.7777777777777778,
                            "#fb9f3a"
                          ],
                          [
                            0.8888888888888888,
                            "#fdca26"
                          ],
                          [
                            1,
                            "#f0f921"
                          ]
                        ],
                        "type": "heatmap"
                      }
                    ],
                    "heatmapgl": [
                      {
                        "colorbar": {
                          "outlinewidth": 0,
                          "ticks": ""
                        },
                        "colorscale": [
                          [
                            0,
                            "#0d0887"
                          ],
                          [
                            0.1111111111111111,
                            "#46039f"
                          ],
                          [
                            0.2222222222222222,
                            "#7201a8"
                          ],
                          [
                            0.3333333333333333,
                            "#9c179e"
                          ],
                          [
                            0.4444444444444444,
                            "#bd3786"
                          ],
                          [
                            0.5555555555555556,
                            "#d8576b"
                          ],
                          [
                            0.6666666666666666,
                            "#ed7953"
                          ],
                          [
                            0.7777777777777778,
                            "#fb9f3a"
                          ],
                          [
                            0.8888888888888888,
                            "#fdca26"
                          ],
                          [
                            1,
                            "#f0f921"
                          ]
                        ],
                        "type": "heatmapgl"
                      }
                    ],
                    "histogram": [
                      {
                        "marker": {
                          "pattern": {
                            "fillmode": "overlay",
                            "size": 10,
                            "solidity": 0.2
                          }
                        },
                        "type": "histogram"
                      }
                    ],
                    "histogram2d": [
                      {
                        "colorbar": {
                          "outlinewidth": 0,
                          "ticks": ""
                        },
                        "colorscale": [
                          [
                            0,
                            "#0d0887"
                          ],
                          [
                            0.1111111111111111,
                            "#46039f"
                          ],
                          [
                            0.2222222222222222,
                            "#7201a8"
                          ],
                          [
                            0.3333333333333333,
                            "#9c179e"
                          ],
                          [
                            0.4444444444444444,
                            "#bd3786"
                          ],
                          [
                            0.5555555555555556,
                            "#d8576b"
                          ],
                          [
                            0.6666666666666666,
                            "#ed7953"
                          ],
                          [
                            0.7777777777777778,
                            "#fb9f3a"
                          ],
                          [
                            0.8888888888888888,
                            "#fdca26"
                          ],
                          [
                            1,
                            "#f0f921"
                          ]
                        ],
                        "type": "histogram2d"
                      }
                    ],
                    "histogram2dcontour": [
                      {
                        "colorbar": {
                          "outlinewidth": 0,
                          "ticks": ""
                        },
                        "colorscale": [
                          [
                            0,
                            "#0d0887"
                          ],
                          [
                            0.1111111111111111,
                            "#46039f"
                          ],
                          [
                            0.2222222222222222,
                            "#7201a8"
                          ],
                          [
                            0.3333333333333333,
                            "#9c179e"
                          ],
                          [
                            0.4444444444444444,
                            "#bd3786"
                          ],
                          [
                            0.5555555555555556,
                            "#d8576b"
                          ],
                          [
                            0.6666666666666666,
                            "#ed7953"
                          ],
                          [
                            0.7777777777777778,
                            "#fb9f3a"
                          ],
                          [
                            0.8888888888888888,
                            "#fdca26"
                          ],
                          [
                            1,
                            "#f0f921"
                          ]
                        ],
                        "type": "histogram2dcontour"
                      }
                    ],
                    "mesh3d": [
                      {
                        "colorbar": {
                          "outlinewidth": 0,
                          "ticks": ""
                        },
                        "type": "mesh3d"
                      }
                    ],
                    "parcoords": [
                      {
                        "line": {
                          "colorbar": {
                            "outlinewidth": 0,
                            "ticks": ""
                          }
                        },
                        "type": "parcoords"
                      }
                    ],
                    "pie": [
                      {
                        "automargin": true,
                        "type": "pie"
                      }
                    ],
                    "scatter": [
                      {
                        "fillpattern": {
                          "fillmode": "overlay",
                          "size": 10,
                          "solidity": 0.2
                        },
                        "type": "scatter"
                      }
                    ],
                    "scatter3d": [
                      {
                        "line": {
                          "colorbar": {
                            "outlinewidth": 0,
                            "ticks": ""
                          }
                        },
                        "marker": {
                          "colorbar": {
                            "outlinewidth": 0,
                            "ticks": ""
                          }
                        },
                        "type": "scatter3d"
                      }
                    ],
                    "scattercarpet": [
                      {
                        "marker": {
                          "colorbar": {
                            "outlinewidth": 0,
                            "ticks": ""
                          }
                        },
                        "type": "scattercarpet"
                      }
                    ],
                    "scattergeo": [
                      {
                        "marker": {
                          "colorbar": {
                            "outlinewidth": 0,
                            "ticks": ""
                          }
                        },
                        "type": "scattergeo"
                      }
                    ],
                    "scattergl": [
                      {
                        "marker": {
                          "colorbar": {
                            "outlinewidth": 0,
                            "ticks": ""
                          }
                        },
                        "type": "scattergl"
                      }
                    ],
                    "scattermapbox": [
                      {
                        "marker": {
                          "colorbar": {
                            "outlinewidth": 0,
                            "ticks": ""
                          }
                        },
                        "type": "scattermapbox"
                      }
                    ],
                    "scatterpolar": [
                      {
                        "marker": {
                          "colorbar": {
                            "outlinewidth": 0,
                            "ticks": ""
                          }
                        },
                        "type": "scatterpolar"
                      }
                    ],
                    "scatterpolargl": [
                      {
                        "marker": {
                          "colorbar": {
                            "outlinewidth": 0,
                            "ticks": ""
                          }
                        },
                        "type": "scatterpolargl"
                      }
                    ],
                    "scatterternary": [
                      {
                        "marker": {
                          "colorbar": {
                            "outlinewidth": 0,
                            "ticks": ""
                          }
                        },
                        "type": "scatterternary"
                      }
                    ],
                    "surface": [
                      {
                        "colorbar": {
                          "outlinewidth": 0,
                          "ticks": ""
                        },
                        "colorscale": [
                          [
                            0,
                            "#0d0887"
                          ],
                          [
                            0.1111111111111111,
                            "#46039f"
                          ],
                          [
                            0.2222222222222222,
                            "#7201a8"
                          ],
                          [
                            0.3333333333333333,
                            "#9c179e"
                          ],
                          [
                            0.4444444444444444,
                            "#bd3786"
                          ],
                          [
                            0.5555555555555556,
                            "#d8576b"
                          ],
                          [
                            0.6666666666666666,
                            "#ed7953"
                          ],
                          [
                            0.7777777777777778,
                            "#fb9f3a"
                          ],
                          [
                            0.8888888888888888,
                            "#fdca26"
                          ],
                          [
                            1,
                            "#f0f921"
                          ]
                        ],
                        "type": "surface"
                      }
                    ],
                    "table": [
                      {
                        "cells": {
                          "fill": {
                            "color": "#EBF0F8"
                          },
                          "line": {
                            "color": "white"
                          }
                        },
                        "header": {
                          "fill": {
                            "color": "#C8D4E3"
                          },
                          "line": {
                            "color": "white"
                          }
                        },
                        "type": "table"
                      }
                    ]
                  },
                  "layout": {
                    "annotationdefaults": {
                      "arrowcolor": "#2a3f5f",
                      "arrowhead": 0,
                      "arrowwidth": 1
                    },
                    "autotypenumbers": "strict",
                    "coloraxis": {
                      "colorbar": {
                        "outlinewidth": 0,
                        "ticks": ""
                      }
                    },
                    "colorscale": {
                      "diverging": [
                        [
                          0,
                          "#8e0152"
                        ],
                        [
                          0.1,
                          "#c51b7d"
                        ],
                        [
                          0.2,
                          "#de77ae"
                        ],
                        [
                          0.3,
                          "#f1b6da"
                        ],
                        [
                          0.4,
                          "#fde0ef"
                        ],
                        [
                          0.5,
                          "#f7f7f7"
                        ],
                        [
                          0.6,
                          "#e6f5d0"
                        ],
                        [
                          0.7,
                          "#b8e186"
                        ],
                        [
                          0.8,
                          "#7fbc41"
                        ],
                        [
                          0.9,
                          "#4d9221"
                        ],
                        [
                          1,
                          "#276419"
                        ]
                      ],
                      "sequential": [
                        [
                          0,
                          "#0d0887"
                        ],
                        [
                          0.1111111111111111,
                          "#46039f"
                        ],
                        [
                          0.2222222222222222,
                          "#7201a8"
                        ],
                        [
                          0.3333333333333333,
                          "#9c179e"
                        ],
                        [
                          0.4444444444444444,
                          "#bd3786"
                        ],
                        [
                          0.5555555555555556,
                          "#d8576b"
                        ],
                        [
                          0.6666666666666666,
                          "#ed7953"
                        ],
                        [
                          0.7777777777777778,
                          "#fb9f3a"
                        ],
                        [
                          0.8888888888888888,
                          "#fdca26"
                        ],
                        [
                          1,
                          "#f0f921"
                        ]
                      ],
                      "sequentialminus": [
                        [
                          0,
                          "#0d0887"
                        ],
                        [
                          0.1111111111111111,
                          "#46039f"
                        ],
                        [
                          0.2222222222222222,
                          "#7201a8"
                        ],
                        [
                          0.3333333333333333,
                          "#9c179e"
                        ],
                        [
                          0.4444444444444444,
                          "#bd3786"
                        ],
                        [
                          0.5555555555555556,
                          "#d8576b"
                        ],
                        [
                          0.6666666666666666,
                          "#ed7953"
                        ],
                        [
                          0.7777777777777778,
                          "#fb9f3a"
                        ],
                        [
                          0.8888888888888888,
                          "#fdca26"
                        ],
                        [
                          1,
                          "#f0f921"
                        ]
                      ]
                    },
                    "colorway": [
                      "#636efa",
                      "#EF553B",
                      "#00cc96",
                      "#ab63fa",
                      "#FFA15A",
                      "#19d3f3",
                      "#FF6692",
                      "#B6E880",
                      "#FF97FF",
                      "#FECB52"
                    ],
                    "font": {
                      "color": "#2a3f5f"
                    },
                    "geo": {
                      "bgcolor": "white",
                      "lakecolor": "white",
                      "landcolor": "#E5ECF6",
                      "showlakes": true,
                      "showland": true,
                      "subunitcolor": "white"
                    },
                    "hoverlabel": {
                      "align": "left"
                    },
                    "hovermode": "closest",
                    "mapbox": {
                      "style": "light"
                    },
                    "paper_bgcolor": "white",
                    "plot_bgcolor": "#E5ECF6",
                    "polar": {
                      "angularaxis": {
                        "gridcolor": "white",
                        "linecolor": "white",
                        "ticks": ""
                      },
                      "bgcolor": "#E5ECF6",
                      "radialaxis": {
                        "gridcolor": "white",
                        "linecolor": "white",
                        "ticks": ""
                      }
                    },
                    "scene": {
                      "xaxis": {
                        "backgroundcolor": "#E5ECF6",
                        "gridcolor": "white",
                        "gridwidth": 2,
                        "linecolor": "white",
                        "showbackground": true,
                        "ticks": "",
                        "zerolinecolor": "white"
                      },
                      "yaxis": {
                        "backgroundcolor": "#E5ECF6",
                        "gridcolor": "white",
                        "gridwidth": 2,
                        "linecolor": "white",
                        "showbackground": true,
                        "ticks": "",
                        "zerolinecolor": "white"
                      },
                      "zaxis": {
                        "backgroundcolor": "#E5ECF6",
                        "gridcolor": "white",
                        "gridwidth": 2,
                        "linecolor": "white",
                        "showbackground": true,
                        "ticks": "",
                        "zerolinecolor": "white"
                      }
                    },
                    "shapedefaults": {
                      "line": {
                        "color": "#2a3f5f"
                      }
                    },
                    "ternary": {
                      "aaxis": {
                        "gridcolor": "white",
                        "linecolor": "white",
                        "ticks": ""
                      },
                      "baxis": {
                        "gridcolor": "white",
                        "linecolor": "white",
                        "ticks": ""
                      },
                      "bgcolor": "#E5ECF6",
                      "caxis": {
                        "gridcolor": "white",
                        "linecolor": "white",
                        "ticks": ""
                      }
                    },
                    "title": {
                      "x": 0.05
                    },
                    "xaxis": {
                      "automargin": true,
                      "gridcolor": "white",
                      "linecolor": "white",
                      "ticks": "",
                      "title": {
                        "standoff": 15
                      },
                      "zerolinecolor": "white",
                      "zerolinewidth": 2
                    },
                    "yaxis": {
                      "automargin": true,
                      "gridcolor": "white",
                      "linecolor": "white",
                      "ticks": "",
                      "title": {
                        "standoff": 15
                      },
                      "zerolinecolor": "white",
                      "zerolinewidth": 2
                    }
                  }
                },
                "title": {
                  "text": "Counts of Battle Outcomes"
                },
                "xaxis": {
                  "anchor": "y",
                  "domain": [
                    0,
                    1
                  ],
                  "title": {
                    "text": "Battle Outcome"
                  }
                },
                "yaxis": {
                  "anchor": "x",
                  "domain": [
                    0,
                    1
                  ],
                  "title": {
                    "text": "Count"
                  }
                }
              }
            }
          },
          "metadata": {},
          "output_type": "display_data"
        }
      ],
      "source": [
        "fig = px.bar(ratings[\"Rating\"].value_counts(),\n",
        "             title=\"Counts of Battle Outcomes\", text_auto=True, height=400)\n",
        "fig.update_layout(xaxis_title=\"Battle Outcome\", yaxis_title=\"Count\",\n",
        "                  showlegend=False)\n",
        "fig"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 105,
      "metadata": {
        "id": "IpiWBwaFVZh0"
      },
      "outputs": [],
      "source": [
        "ratings_no_tie = ratings[~ratings[\"Rating\"].str.contains(\"Tie\")]"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 126,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 417
        },
        "id": "02GoDlp7VkvP",
        "outputId": "bf5e1c55-9102-4afb-94b1-62c4ffb8775b"
      },
      "outputs": [
        {
          "data": {
            "application/vnd.plotly.v1+json": {
              "config": {
                "plotlyServerURL": "https://plot.ly"
              },
              "data": [
                {
                  "alignmentgroup": "True",
                  "hovertemplate": "index=%{x}<br>y=%{y}<extra></extra>",
                  "legendgroup": "",
                  "marker": {
                    "color": "#636efa",
                    "pattern": {
                      "shape": ""
                    }
                  },
                  "name": "",
                  "offsetgroup": "",
                  "orientation": "v",
                  "showlegend": false,
                  "textposition": "auto",
                  "texttemplate": "%{y}",
                  "type": "bar",
                  "x": [
                    "langchain google-serper search agent (gpt-4o-2024-08-06)",
                    "langchain brave-search agent (gpt-4o-2024-08-06)",
                    "langchain Wikipedia (gpt-4o-2024-08-06)",
                    "langchain Wikipedia (claude-3-5-sonnet-20240620)",
                    "langchain Wikipedia (open-mixtral-8x7b)",
                    "langchain google-serper search agent (gpt-4-0613)",
                    "langchain Wikipedia (gpt-4-0613)",
                    "langchain Wikipedia (gpt-4o-mini-2024-07-18)",
                    "llamaindex brave-search agent (gpt-4o-2024-08-06)",
                    "langchain google-serper search agent (gpt-4-turbo-2024-04-09)",
                    "langchain google-serper search agent (gpt-4o-2024-05-13)",
                    "langchain Wikipedia (gpt-4o-2024-05-13)",
                    "langchain google-serper search agent (mistral-large-2407)",
                    "langchain Wikipedia (claude-3-opus-20240229)",
                    "langchain Wikipedia (gpt-4-turbo-2024-04-09)",
                    "langchain google-serper search agent (claude-3-5-sonnet-20240620)",
                    "langchain google-serper search agent (open-mixtral-8x7b)",
                    "langchain Wikipedia (llama-3.1-8B-instruct)",
                    "langchain google-serper search agent (claude-3-opus-20240229)",
                    "langchain google-serper search agent (open-mixtral-8x22b)",
                    "langchain google-serper search agent (gemini-1.5-pro-001)",
                    "langchain Wikipedia (open-mixtral-8x22b)",
                    "langchain google-serper search agent (gpt-4o-mini-2024-07-18)",
                    "langchain Wikipedia (llama-3.1-70B-instruct)",
                    "langchain google-serper search agent (llama-3.1-70B-instruct)",
                    "langchain google-serper search agent (gemini-1.5-flash-001)",
                    "langchain Wikipedia (gemini-1.5-pro-001)",
                    "langchain google-serper search agent (llama-3.1-8B-instruct)",
                    "langchain Wikipedia (gemini-1.5-flash-001)",
                    "langchain google-serper search agent (claude-3-haiku-20240307)",
                    "langchain google-serper search agent (llama-3.1-405B-instruct)",
                    "langchain Wikipedia (llama-3.1-405B-instruct)",
                    "langchain Wikipedia (mistral-large-2407)",
                    "langchain Wikipedia (claude-3-haiku-20240307)",
                    "openai general assistant (gpt-4o-2024-08-06)",
                    "langchain alpha-vantage stock agent (gpt-4-turbo-2024-04-09)",
                    "openai assistant function calling (gpt-4o-2024-08-06)",
                    "anthropic calculator tool (claude-3-5-sonnet-20240620)",
                    "anthropic calculator tool (claude-3-opus-20240229)",
                    "anthropic calculator tool (claude-3-haiku-20240307)",
                    "langchain alpha-vantage stock agent (gemini-1.5-flash-001)",
                    "langchain ArXiv Article Fetcher (gpt-4-0613)",
                    "langchain YouTube Search (gpt-4o-2024-08-06)",
                    "langchain ArXiv Article Fetcher (gpt-4o-2024-08-06)",
                    "langchain Pandas DataFrame (gpt-4o-2024-08-06)",
                    "anthropic web page reader (claude-3-5-sonnet-20240620)",
                    "anthropic web page reader (claude-3-haiku-20240307)",
                    "langchain Wolfram Alpha (gpt-4o-2024-08-06)",
                    "langchain alpha-vantage stock agent (gpt-4o-2024-08-06)",
                    "langchain Wolfram Alpha (claude-3-opus-20240229)",
                    "langchain ArXiv Article Fetcher (gpt-4-turbo-2024-04-09)",
                    "langchain alpha-vantage stock agent (claude-3-5-sonnet-20240620)",
                    "langchain Wolfram Alpha (llama-3.1-8B-instruct)",
                    "langchain ArXiv Article Fetcher (llama-3.1-70B-instruct)",
                    "llamaindex wikipedia (gemini-1.5-flash-001)",
                    "langchain Wolfram Alpha (gemini-1.5-pro-001)",
                    "langchain Wolfram Alpha (llama-3.1-70B-instruct)",
                    "llamaindex wikipedia (claude-3-haiku-20240307)",
                    "langchain ArXiv Article Fetcher (gpt-4o-2024-05-13)",
                    "llamaindex wikipedia (llama-3.1-70B-instruct)",
                    "llamaindex wikipedia (llama-3.1-405B-instruct)",
                    "langchain Wolfram Alpha (claude-3-haiku-20240307)",
                    "openai general assistant (gpt-4-0613)",
                    "sql agent plotter langchain (gpt-4o-2024-08-06)",
                    "langchain brave-search agent (claude-3-5-sonnet-20240620)",
                    "langchain brave-search agent (claude-3-haiku-20240307)",
                    "anthropic web page reader (claude-3-opus-20240229)",
                    "langchain Wolfram Alpha (gemini-1.5-flash-001)",
                    "langchain ArXiv Article Fetcher (claude-3-haiku-20240307)",
                    "langchain Wolfram Alpha (gpt-4o-2024-05-13)",
                    "langchain alpha-vantage stock agent (claude-3-opus-20240229)",
                    "llamaindex wikipedia (gpt-4-turbo-2024-04-09)",
                    "langchain brave-search agent (llama-3.1-70B-instruct)",
                    "langchain ArXiv Article Fetcher (claude-3-5-sonnet-20240620)",
                    "llamaindex ArXiv Article Fetcher (gpt-4o-2024-08-06)",
                    "sql agent plotter langchain (gpt-4o-2024-05-13)",
                    "langchain brave-search agent (open-mixtral-8x22b)",
                    "langchain Wolfram Alpha (open-mixtral-8x7b)",
                    "llamaindex wikipedia (gpt-4o-2024-08-06)",
                    "sql agent plotter llamaindex (gpt-4o-2024-05-13)",
                    "openai general assistant (gpt-4-turbo-2024-04-09)",
                    "llamaindex wikipedia (gpt-4-0613)",
                    "langchain brave-search agent (llama-3.1-405B-instruct)",
                    "langchain Pandas DataFrame (claude-3-5-sonnet-20240620)",
                    "llamaindex wikipedia (claude-3-5-sonnet-20240620)",
                    "langchain Wolfram Alpha (llama-3.1-405B-instruct)",
                    "langchain Wolfram Alpha (gpt-4-0613)",
                    "langchain Wolfram Alpha (mistral-large-2407)",
                    "langchain brave-search agent (open-mixtral-8x7b)",
                    "llamaindex ArXiv Article Fetcher (claude-3-5-sonnet-20240620)",
                    "langchain brave-search agent (gpt-4-0613)",
                    "langchain brave-search agent (gpt-4o-2024-05-13)",
                    "llamaindex wikipedia (open-mixtral-8x22b)",
                    "langchain brave-search agent (mistral-large-2407)",
                    "langchain Wolfram Alpha (claude-3-5-sonnet-20240620)",
                    "llamaindex wikipedia (open-mixtral-8x7b)",
                    "langchain brave-search agent (gpt-4-turbo-2024-04-09)",
                    "llamaindex wikipedia (claude-3-opus-20240229)",
                    "langchain ArXiv Article Fetcher (gemini-1.5-flash-001)",
                    "langchain ArXiv Article Fetcher (open-mixtral-8x7b)",
                    "langchain ArXiv Article Fetcher (open-mixtral-8x22b)",
                    "llamaindex wikipedia (llama-3.1-8B-instruct)",
                    "langchain Pandas DataFrame (llama-3.1-405B-instruct)",
                    "langchain brave-search agent (claude-3-opus-20240229)",
                    "langchain ArXiv Article Fetcher (gpt-4o-mini-2024-07-18)",
                    "langchain Yahoo Finance News (gpt-4o-2024-08-06)",
                    "langchain Wolfram Alpha (open-mixtral-8x22b)",
                    "llamaindex wikipedia (mistral-large-2407)",
                    "langchain PubMed Biomedical Literature Tool (gpt-4o-2024-08-06)",
                    "langchain ArXiv Article Fetcher (mistral-large-2407)",
                    "openai assistant code interpreter (gpt-4o-2024-05-13)",
                    "langchain PubMed Biomedical Literature Tool (gpt-4o-2024-05-13)",
                    "langchain Wolfram Alpha (gpt-4o-mini-2024-07-18)",
                    "openai general assistant (gpt-4o-2024-05-13)",
                    "langchain Wolfram Alpha (gpt-4-turbo-2024-04-09)",
                    "langchain NASA Toolkit (gpt-4-0613)",
                    "langchain ArXiv Article Fetcher (llama-3.1-8B-instruct)",
                    "langchain brave-search agent (llama-3.1-8B-instruct)",
                    "langchain PubMed Biomedical Literature Tool (claude-3-haiku-20240307)",
                    "llamaindex wikipedia (gpt-4o-mini-2024-07-18)",
                    "openai assistant function calling (gpt-4-turbo-2024-04-09)",
                    "langchain Google Lens (open-mixtral-8x7b)",
                    "langchain alpha-vantage stock agent (llama-3.1-8B-instruct)",
                    "langchain Yahoo Finance News (claude-3-5-sonnet-20240620)",
                    "langchain ArXiv Article Fetcher (llama-3.1-405B-instruct)",
                    "langchain YouTube Search (gemini-1.5-flash-001)",
                    "langchain Python REPL (gpt-4-turbo-2024-04-09)",
                    "langchain You.com Search (gemini-1.5-flash-001)",
                    "langchain alpha-vantage stock agent (llama-3.1-405B-instruct)",
                    "langchain alpha-vantage stock agent (open-mixtral-8x22b)",
                    "langchain Yahoo Finance News (claude-3-haiku-20240307)",
                    "llamaindex ArXiv Article Fetcher (gpt-4o-2024-05-13)",
                    "langchain Yahoo Finance News (gemini-1.5-pro-001)",
                    "langchain Yahoo Finance News (mistral-large-2407)",
                    "langchain Pandas DataFrame (claude-3-haiku-20240307)",
                    "langchain PubMed Biomedical Literature Tool (llama-3.1-405B-instruct)",
                    "llamaindex OpenWeatherMap (mistral-large-2407)",
                    "langchain Pandas DataFrame (gemini-1.5-flash-001)",
                    "sql agent plotter langchain (open-mixtral-8x7b)",
                    "langchain brave-search agent (gemini-1.5-pro-001)",
                    "langchain Python REPL (gemini-1.5-flash-001)",
                    "langchain Python REPL (gpt-4o-2024-08-06)",
                    "langchain NASA Toolkit (mistral-large-2407)",
                    "langchain ArXiv Article Fetcher (claude-3-opus-20240229)",
                    "sql agent plotter llamaindex (gpt-4o-2024-08-06)",
                    "llamaindex wikipedia (gpt-4o-2024-05-13)",
                    "langchain PubMed Biomedical Literature Tool (open-mixtral-8x22b)",
                    "langchain YouTube Search (gpt-4-turbo-2024-04-09)",
                    "langchain Dall-E Image Generator (gpt-4o-2024-08-06)",
                    "openai assistant code interpreter (gpt-4-turbo-2024-04-09)",
                    "langchain Python REPL (claude-3-5-sonnet-20240620)",
                    "langchain NASA Toolkit (gpt-4-turbo-2024-04-09)",
                    "langchain YouTube Search (gpt-4o-2024-05-13)",
                    "langchain Python REPL (llama-3.1-70B-instruct)",
                    "langchain brave-search agent (gpt-4o-mini-2024-07-18)",
                    "langchain brave-search agent (gemini-1.5-flash-001)",
                    "langchain Yahoo Finance News (open-mixtral-8x22b)",
                    "langchain Tavily Search (gpt-4o-2024-05-13)",
                    "sql agent plotter langchain (claude-3-opus-20240229)",
                    "langchain Pandas DataFrame (claude-3-opus-20240229)",
                    "langchain NASA Toolkit (claude-3-5-sonnet-20240620)",
                    "langchain YouTube Search (claude-3-haiku-20240307)",
                    "openai assistant code interpreter (gpt-4o-2024-08-06)",
                    "langchain NASA Toolkit (gpt-4o-2024-05-13)",
                    "langchain PubMed Biomedical Literature Tool (claude-3-opus-20240229)",
                    "sql agent plotter langchain (gemini-1.5-flash-001)",
                    "sql agent plotter langchain (gpt-4-turbo-2024-04-09)",
                    "langchain You.com Search (gpt-4-turbo-2024-04-09)",
                    "langchain PubMed Biomedical Literature Tool (gemini-1.5-pro-001)",
                    "langchain PubMed Biomedical Literature Tool (llama-3.1-8B-instruct)",
                    "sql agent plotter langchain (llama-3.1-405B-instruct)",
                    "sql agent plotter langchain (llama-3.1-70B-instruct)",
                    "langchain alpha-vantage stock agent (llama-3.1-70B-instruct)",
                    "langchain Yahoo Finance News (open-mixtral-8x7b)",
                    "langchain PubMed Biomedical Literature Tool (gpt-4o-mini-2024-07-18)",
                    "langchain PubMed Biomedical Literature Tool (claude-3-5-sonnet-20240620)",
                    "langchain PubMed Biomedical Literature Tool (gpt-4-0613)",
                    "langchain Google Lens (gpt-4o-2024-05-13)",
                    "langchain Google Lens (gpt-4-turbo-2024-04-09)",
                    "langchain alpha-vantage stock agent (claude-3-haiku-20240307)",
                    "langchain YouTube Search (mistral-large-2407)",
                    "langchain PubMed Biomedical Literature Tool (mistral-large-2407)",
                    "crewai AI Crew for Trip Planning (mistral-large-2407)",
                    "llamaindex wikipedia (gemini-1.5-pro-001)",
                    "langchain OpenWeatherMap (gpt-4o-2024-08-06)",
                    "langchain Yahoo Finance News (gpt-4-turbo-2024-04-09)",
                    "sql agent plotter langchain (gpt-4-0613)",
                    "anthropic sql query (claude-3-haiku-20240307)",
                    "langchain ArXiv Article Fetcher (gemini-1.5-pro-001)",
                    "langchain YouTube Search (gemini-1.5-pro-001)",
                    "langchain OpenWeatherMap (claude-3-5-sonnet-20240620)",
                    "crewai AI Crew for Trip Planning (open-mixtral-8x22b)",
                    "langchain You.com Search (claude-3-opus-20240229)",
                    "langchain AskNews (gpt-4o-2024-08-06)",
                    "langchain AskNews (llama-3.1-405B-instruct)",
                    "langchain Google Lens (llama-3.1-8B-instruct)",
                    "langchain PubMed Biomedical Literature Tool (gpt-4-turbo-2024-04-09)",
                    "langchain Yahoo Finance News (claude-3-opus-20240229)",
                    "langchain You.com Search (mistral-large-2407)",
                    "langchain PubMed Biomedical Literature Tool (llama-3.1-70B-instruct)",
                    "langchain Tavily Search (gpt-4o-2024-08-06)",
                    "langchain Google Lens (claude-3-haiku-20240307)",
                    "llamaindex OpenWeatherMap (llama-3.1-70B-instruct)",
                    "langchain Google Jobs (gpt-4o-2024-08-06)",
                    "langchain Yahoo Finance News (gpt-4-0613)",
                    "langchain Python REPL (gpt-4-0613)",
                    "anthropic customer service agent (claude-3-opus-20240229)",
                    "llamaindex Yahoo Finance News (claude-3-haiku-20240307)",
                    "llamaindex ArXiv Article Fetcher (mistral-large-2407)",
                    "langchain AskNews (llama-3.1-70B-instruct)",
                    "openai assistant customer support chatbot (gpt-4-0613)",
                    "langchain Eden AI Integration (gpt-4o-2024-08-06)",
                    "langchain Dall-E Image Generator (open-mixtral-8x22b)",
                    "sql agent plotter llamaindex (gpt-4-0613)",
                    "langchain Google Jobs (gpt-4-turbo-2024-04-09)",
                    "llamaindex ArXiv Article Fetcher (llama-3.1-70B-instruct)",
                    "llamaindex Yahoo Finance News (gpt-4-turbo-2024-04-09)",
                    "langchain YouTube Search (claude-3-5-sonnet-20240620)",
                    "langchain Tavily Search (claude-3-5-sonnet-20240620)",
                    "langchain Dall-E Image Generator (llama-3.1-8B-instruct)",
                    "crewai AI Crew for Trip Planning (claude-3-haiku-20240307)",
                    "langchain NASA Toolkit (open-mixtral-8x22b)",
                    "llamaindex ArXiv Article Fetcher (open-mixtral-8x22b)",
                    "langchain PubMed Biomedical Literature Tool (open-mixtral-8x7b)",
                    "langchain Shell (claude-3-5-sonnet-20240620)",
                    "langchain Tavily Search (mistral-large-2407)",
                    "langchain OpenWeatherMap (llama-3.1-405B-instruct)",
                    "langchain YouTube Search (llama-3.1-8B-instruct)",
                    "langchain You.com Search (gpt-4o-2024-08-06)",
                    "langchain Python REPL (claude-3-opus-20240229)",
                    "crewai AI Crew for Game Building (gpt-4o-2024-08-06)",
                    "langchain YouTube Search (claude-3-opus-20240229)",
                    "langchain OpenWeatherMap (claude-3-opus-20240229)",
                    "langchain You.com Search (gpt-4o-mini-2024-07-18)",
                    "llamaindex ArXiv Article Fetcher (gpt-4-0613)",
                    "langchain Yahoo Finance News (gpt-4o-2024-05-13)",
                    "langchain OpenWeatherMap (gemini-1.5-flash-001)",
                    "langchain YouTube Search (llama-3.1-70B-instruct)",
                    "langchain You.com Search (open-mixtral-8x7b)",
                    "langchain Dall-E Image Generator (claude-3-haiku-20240307)",
                    "langchain Pandas DataFrame (open-mixtral-8x22b)",
                    "langchain Yahoo Finance News (gpt-4o-mini-2024-07-18)",
                    "langchain Python REPL (open-mixtral-8x22b)",
                    "langchain YouTube Search (gpt-4-0613)",
                    "langchain Python REPL (open-mixtral-8x7b)",
                    "langchain OpenWeatherMap (gpt-4o-mini-2024-07-18)",
                    "crewai AI Crew for Trip Planning (llama-3.1-70B-instruct)",
                    "langchain Python REPL (gpt-4o-mini-2024-07-18)",
                    "langchain You.com Search (gpt-4-0613)",
                    "langchain Python REPL (claude-3-haiku-20240307)",
                    "sql agent plotter langchain (llama-3.1-8B-instruct)",
                    "langchain Python REPL (gpt-4o-2024-05-13)",
                    "langchain AskNews (claude-3-5-sonnet-20240620)",
                    "llamaindex OpenWeatherMap (claude-3-haiku-20240307)",
                    "llamaindex OpenWeatherMap (llama-3.1-8B-instruct)",
                    "sql agent plotter llamaindex (claude-3-5-sonnet-20240620)",
                    "llamaindex OpenWeatherMap (gpt-4-0613)",
                    "langchain Pandas DataFrame (gemini-1.5-pro-001)",
                    "langchain NASA Toolkit (open-mixtral-8x7b)",
                    "openai assistant code interpreter (gpt-4-0613)",
                    "llamaindex Yelp Tool (claude-3-haiku-20240307)",
                    "langchain You.com Search (gemini-1.5-pro-001)",
                    "llamaindex OpenWeatherMap (gemini-1.5-pro-001)",
                    "langchain Google Lens (claude-3-5-sonnet-20240620)",
                    "llamaindex OpenWeatherMap (claude-3-5-sonnet-20240620)",
                    "langchain AskNews (gemini-1.5-flash-001)",
                    "langchain You.com Search (claude-3-haiku-20240307)",
                    "langchain You.com Search (open-mixtral-8x22b)",
                    "langchain YouTube Search (gpt-4o-mini-2024-07-18)",
                    "langchain AskNews (open-mixtral-8x7b)",
                    "langchain You.com Search (llama-3.1-70B-instruct)",
                    "langchain AskNews (gpt-4-turbo-2024-04-09)",
                    "llamaindex code interpreter (gemini-1.5-pro-001)",
                    "langchain NASA Toolkit (llama-3.1-70B-instruct)",
                    "llamaindex ArXiv Article Fetcher (open-mixtral-8x7b)",
                    "langchain YouTube Search (llama-3.1-405B-instruct)",
                    "langchain Tavily Search (llama-3.1-405B-instruct)",
                    "llamaindex ArXiv Article Fetcher (llama-3.1-405B-instruct)",
                    "llamaindex Yahoo Finance News (gemini-1.5-pro-001)",
                    "llamaindex brave-search agent (open-mixtral-8x22b)",
                    "crewai AI Crew for Trip Planning (gpt-4o-2024-05-13)",
                    "langchain Google Jobs (open-mixtral-8x7b)",
                    "crewai AI Crew for Trip Planning (gemini-1.5-pro-001)",
                    "llamaindex code interpreter (gpt-4o-mini-2024-07-18)",
                    "langchain Tavily Search (gemini-1.5-flash-001)",
                    "llamaindex code interpreter (gpt-4o-2024-05-13)",
                    "langchain NASA Toolkit (gpt-4o-2024-08-06)",
                    "langchain Yahoo Finance News (llama-3.1-8B-instruct)",
                    "openai assistant function calling (gpt-4-0613)",
                    "langchain OpenWeatherMap (gpt-4o-2024-05-13)",
                    "langchain Google Jobs (llama-3.1-405B-instruct)",
                    "langchain OpenWeatherMap (gemini-1.5-pro-001)",
                    "langchain Google Lens (gpt-4-0613)",
                    "langchain Dall-E Image Generator (gpt-4-turbo-2024-04-09)",
                    "langchain Tavily Search (open-mixtral-8x7b)",
                    "langchain YouTube Search (open-mixtral-8x22b)",
                    "langchain alpha-vantage stock agent (gpt-4o-2024-05-13)",
                    "sql agent plotter langchain (mistral-large-2407)",
                    "anthropic pdf upload summarization (claude-3-opus-20240229)",
                    "langchain OpenWeatherMap (claude-3-haiku-20240307)",
                    "sql agent plotter langchain (gemini-1.5-pro-001)",
                    "langchain alpha-vantage stock agent (mistral-large-2407)",
                    "langchain OpenWeatherMap (llama-3.1-8B-instruct)",
                    "langchain alpha-vantage stock agent (open-mixtral-8x7b)",
                    "langchain Riza Code Interpreter (gpt-4o-2024-05-13)",
                    "llamaindex ArXiv Article Fetcher (gpt-4o-mini-2024-07-18)",
                    "langchain alpha-vantage stock agent (gpt-4o-mini-2024-07-18)",
                    "sql agent plotter langchain (claude-3-haiku-20240307)",
                    "crewai AI Crew for Trip Planning (claude-3-5-sonnet-20240620)",
                    "crewai AI Crew for Trip Planning (gpt-4o-2024-08-06)",
                    "langchain Google Jobs (claude-3-5-sonnet-20240620)",
                    "langchain NASA Toolkit (llama-3.1-8B-instruct)",
                    "anthropic sql query (claude-3-opus-20240229)",
                    "openai assistant function calling (gpt-4o-2024-05-13)",
                    "llamaindex OpenWeatherMap (claude-3-opus-20240229)",
                    "anthropic sql query (claude-3-5-sonnet-20240620)",
                    "langchain alpha-vantage stock agent (gemini-1.5-pro-001)",
                    "langchain OpenWeatherMap (open-mixtral-8x7b)",
                    "crewai Meeting Preparation Agent Crew (llama-3.1-405B-instruct)",
                    "langchain Dall-E Image Generator (gpt-4-0613)",
                    "openai assistant function calling (gpt-4o-mini-2024-07-18)",
                    "langchain Google Jobs (gpt-4-0613)",
                    "llamaindex OpenWeatherMap (open-mixtral-8x22b)",
                    "langchain Dall-E Image Generator (gpt-4o-2024-05-13)",
                    "langchain Google Lens (gpt-4o-2024-08-06)",
                    "llamaindex Yahoo Finance News (gpt-4o-2024-05-13)",
                    "langchain You.com Search (gpt-4o-2024-05-13)",
                    "langchain Golden Query Integration (gpt-4o-2024-08-06)",
                    "langchain Python REPL (llama-3.1-8B-instruct)",
                    "sql agent plotter llamaindex (llama-3.1-8B-instruct)",
                    "langchain NASA Toolkit (gemini-1.5-flash-001)",
                    "sql agent plotter llamaindex (open-mixtral-8x7b)",
                    "sql agent plotter langchain (claude-3-5-sonnet-20240620)",
                    "langchain Google Jobs (gpt-4o-2024-05-13)",
                    "langchain alpha-vantage stock agent (gpt-4-0613)",
                    "llamaindex OpenWeatherMap (gpt-4-turbo-2024-04-09)",
                    "sql agent plotter llamaindex (gemini-1.5-pro-001)",
                    "langchain Pandas DataFrame (llama-3.1-70B-instruct)",
                    "sql agent plotter llamaindex (mistral-large-2407)",
                    "langchain GraphQL API Integration (gpt-4o-2024-05-13)",
                    "langchain NASA Toolkit (claude-3-opus-20240229)",
                    "langchain Golden Query Integration (claude-3-opus-20240229)",
                    "langchain Golden Query Integration (mistral-large-2407)",
                    "langchain Yahoo Finance News (llama-3.1-405B-instruct)",
                    "langchain Golden Query Integration (open-mixtral-8x7b)",
                    "llamaindex Tavily Research Tool (open-mixtral-8x7b)",
                    "langchain Eden AI Integration (gemini-1.5-flash-001)",
                    "llamaindex OpenWeatherMap (gemini-1.5-flash-002)",
                    "langchain Golden Query Integration (claude-3-haiku-20240307)",
                    "crewai AI Crew for Game Building (mistral-large-2407)",
                    "crewai Meeting Preparation Agent Crew (gpt-4o-2024-08-06)",
                    "openai assistant customer support chatbot (gpt-4o-2024-05-13)",
                    "crewai AI Crew for Game Building (gpt-4-turbo-2024-04-09)",
                    "langchain Exa Search Integration (gpt-4-0613)",
                    "langchain Google Jobs (gemini-1.5-pro-001)",
                    "llamaindex Yelp Tool (open-mixtral-8x22b)",
                    "langchain Dall-E Image Generator (claude-3-5-sonnet-20240620)",
                    "langchain Pandas DataFrame (gpt-4o-2024-05-13)",
                    "llamaindex Yelp Tool (gpt-4o-2024-05-13)",
                    "langchain YouTube Search (open-mixtral-8x7b)",
                    "langchain Google Jobs (gpt-4o-mini-2024-07-18)",
                    "langchain OpenWeatherMap (open-mixtral-8x22b)",
                    "langchain Golden Query Integration (gpt-4o-mini-2024-07-18)",
                    "langchain Python REPL (llama-3.1-405B-instruct)",
                    "langchain Exa Search Integration (gemini-1.5-pro-001)",
                    "langchain AskNews (llama-3.1-8B-instruct)",
                    "llamaindex Yelp Tool (llama-3.1-8B-instruct)",
                    "llamaindex code interpreter (gpt-4-turbo-2024-04-09)",
                    "langchain You.com Search (claude-3-5-sonnet-20240620)",
                    "llamaindex ArXiv Article Fetcher (llama-3.1-8B-instruct)",
                    "llamaindex code interpreter (claude-3-5-sonnet-20240620)",
                    "llamaindex Yelp Tool (llama-3.1-70B-instruct)",
                    "langchain Tavily Search (gpt-4-turbo-2024-04-09)",
                    "langchain AskNews (mistral-large-2407)",
                    "langchain Gmail Toolkit (mistral-large-2407)",
                    "langchain You.com Search (llama-3.1-405B-instruct)",
                    "langchain AskNews (claude-3-opus-20240229)",
                    "llamaindex Wolfram Alpha (open-mixtral-8x22b)",
                    "langchain Shell (gpt-4o-mini-2024-07-18)",
                    "langchain AskNews (gpt-4o-2024-05-13)",
                    "crewai Meeting Preparation Agent Crew (gemini-1.5-pro-001)",
                    "sql agent plotter llamaindex (gpt-4-turbo-2024-04-09)",
                    "sql agent plotter llamaindex (llama-3.1-70B-instruct)",
                    "langchain Exa Search Integration (gpt-4-turbo-2024-04-09)",
                    "anthropic customer service agent (claude-3-5-sonnet-20240620)",
                    "langchain Dall-E Image Generator (mistral-large-2407)",
                    "langchain Golden Query Integration (llama-3.1-70B-instruct)",
                    "crewai AI Crew for Game Building (open-mixtral-8x7b)",
                    "langchain Riza Code Interpreter (claude-3-5-sonnet-20240620)",
                    "crewai AI Crew for Game Building (gemini-1.5-flash-001)",
                    "llamaindex brave-search agent (claude-3-opus-20240229)",
                    "llamaindex Yelp Tool (claude-3-opus-20240229)",
                    "langchain alpha-vantage stock agent (gemini-1.5-pro-002)",
                    "crewai AI Crew for Trip Planning (gpt-4-0613)",
                    "langchain GraphQL API Integration (gpt-4-0613)",
                    "llamaindex brave-search agent (gemini-1.5-pro-001)",
                    "langchain Google Lens (llama-3.1-405B-instruct)",
                    "llamaindex Exa Search Integration (gpt-4o-2024-08-06)",
                    "langchain Riza Code Interpreter (claude-3-opus-20240229)",
                    "langchain Eden AI Integration (open-mixtral-8x22b)",
                    "langchain Dall-E Image Generator (claude-3-opus-20240229)",
                    "langchain Dall-E Image Generator (open-mixtral-8x7b)",
                    "llamaindex Yahoo Finance News (claude-3-5-sonnet-20240620)",
                    "langchain Riza Code Interpreter (open-mixtral-8x22b)",
                    "langchain Eden AI Integration (mistral-large-2407)",
                    "llamaindex Yahoo Finance News (llama-3.1-8B-instruct)",
                    "sql agent plotter llamaindex (claude-3-opus-20240229)",
                    "langchain Riza Code Interpreter (open-mixtral-8x7b)",
                    "llamaindex OpenAPI Tool (gpt-4o-2024-08-06)",
                    "llamaindex OpenWeatherMap (open-mixtral-8x7b)",
                    "llamaindex Yahoo Finance News (llama-3.1-405B-instruct)",
                    "langchain Yahoo Finance News (llama-3.1-70B-instruct)",
                    "crewai AI Crew for Trip Planning (gemini-1.5-pro-002)",
                    "llamaindex Yahoo Finance News (claude-3-opus-20240229)",
                    "langchain Google Lens (llama-3.1-70B-instruct)",
                    "crewai AI Crew for Trip Planning (claude-3-opus-20240229)",
                    "langchain Dall-E Image Generator (llama-3.1-70B-instruct)",
                    "langchain Dall-E Image Generator (gpt-4o-mini-2024-07-18)",
                    "crewai AI Crew for Game Building (gpt-4o-2024-05-13)",
                    "crewai AI Crew for Game Building (llama-3.1-8B-instruct)",
                    "llamaindex Wolfram Alpha (gpt-4o-2024-08-06)",
                    "llamaindex Wolfram Alpha (llama-3.1-70B-instruct)",
                    "anthropic pdf upload summarization (claude-3-haiku-20240307)",
                    "llamaindex Yahoo Finance News (mistral-large-2407)",
                    "langchain Gmail Toolkit (gemini-1.5-flash-001)",
                    "crewai AI Crew for Trip Planning (llama-3.1-8B-instruct)",
                    "llamaindex Wolfram Alpha (gpt-4-0613)",
                    "langchain Shell (gpt-4o-2024-05-13)",
                    "langchain Riza Code Interpreter (llama-3.1-405B-instruct)",
                    "langchain Python REPL (mistral-large-2407)",
                    "langchain Yahoo Finance News (gemini-1.5-flash-001)",
                    "llamaindex ArXiv Article Fetcher (gemini-1.5-flash-001)",
                    "langchain Pandas DataFrame (mistral-large-2407)",
                    "langchain Google Lens (gemini-1.5-pro-001)",
                    "langchain Pandas DataFrame (llama-3.1-8B-instruct)",
                    "langchain Dall-E Image Generator (gemini-1.5-pro-001)",
                    "langchain Exa Search Integration (open-mixtral-8x22b)",
                    "langchain Tavily Search (gpt-4-0613)",
                    "langchain Exa Search Integration (claude-3-5-sonnet-20240620)",
                    "sql agent plotter langchain (open-mixtral-8x22b)",
                    "langchain Google Lens (mistral-large-2407)",
                    "llamaindex Yahoo Finance News (gemini-1.5-flash-001)",
                    "crewai AI Crew for Game Building (gpt-4-0613)",
                    "langchain OpenWeatherMap (mistral-large-2407)",
                    "anthropic pdf upload summarization (claude-3-5-sonnet-20240620)",
                    "langchain Google Lens (gpt-4o-mini-2024-07-18)",
                    "llamaindex ArXiv Article Fetcher (gemini-1.5-flash-002)"
                  ],
                  "xaxis": "x",
                  "y": [
                    178,
                    101,
                    97,
                    81,
                    77,
                    77,
                    74,
                    73,
                    72,
                    71,
                    70,
                    70,
                    68,
                    68,
                    67,
                    66,
                    65,
                    64,
                    64,
                    63,
                    63,
                    61,
                    60,
                    59,
                    58,
                    58,
                    56,
                    56,
                    55,
                    54,
                    53,
                    52,
                    51,
                    49,
                    39,
                    34,
                    31,
                    29,
                    26,
                    25,
                    24,
                    24,
                    22,
                    20,
                    19,
                    18,
                    17,
                    17,
                    17,
                    16,
                    16,
                    16,
                    16,
                    14,
                    14,
                    14,
                    14,
                    14,
                    14,
                    13,
                    13,
                    13,
                    12,
                    12,
                    12,
                    12,
                    12,
                    12,
                    11,
                    11,
                    11,
                    11,
                    10,
                    10,
                    10,
                    10,
                    10,
                    10,
                    10,
                    10,
                    10,
                    10,
                    9,
                    9,
                    9,
                    9,
                    9,
                    9,
                    9,
                    9,
                    9,
                    9,
                    9,
                    9,
                    9,
                    9,
                    8,
                    8,
                    8,
                    8,
                    8,
                    8,
                    8,
                    8,
                    8,
                    8,
                    8,
                    7,
                    7,
                    7,
                    7,
                    7,
                    7,
                    7,
                    7,
                    7,
                    7,
                    6,
                    6,
                    6,
                    6,
                    6,
                    6,
                    6,
                    6,
                    6,
                    6,
                    6,
                    6,
                    6,
                    6,
                    6,
                    5,
                    5,
                    5,
                    5,
                    5,
                    5,
                    5,
                    5,
                    5,
                    5,
                    5,
                    5,
                    5,
                    5,
                    5,
                    5,
                    5,
                    5,
                    5,
                    5,
                    5,
                    5,
                    5,
                    5,
                    5,
                    4,
                    4,
                    4,
                    4,
                    4,
                    4,
                    4,
                    4,
                    4,
                    4,
                    4,
                    4,
                    4,
                    4,
                    4,
                    4,
                    4,
                    4,
                    4,
                    4,
                    4,
                    4,
                    4,
                    4,
                    4,
                    4,
                    4,
                    4,
                    4,
                    4,
                    4,
                    4,
                    4,
                    4,
                    4,
                    4,
                    3,
                    3,
                    3,
                    3,
                    3,
                    3,
                    3,
                    3,
                    3,
                    3,
                    3,
                    3,
                    3,
                    3,
                    3,
                    3,
                    3,
                    3,
                    3,
                    3,
                    3,
                    3,
                    3,
                    3,
                    3,
                    3,
                    3,
                    3,
                    3,
                    3,
                    3,
                    3,
                    3,
                    3,
                    3,
                    3,
                    3,
                    3,
                    3,
                    3,
                    3,
                    3,
                    3,
                    3,
                    3,
                    3,
                    3,
                    3,
                    3,
                    3,
                    3,
                    3,
                    3,
                    3,
                    3,
                    3,
                    3,
                    3,
                    3,
                    3,
                    2,
                    2,
                    2,
                    2,
                    2,
                    2,
                    2,
                    2,
                    2,
                    2,
                    2,
                    2,
                    2,
                    2,
                    2,
                    2,
                    2,
                    2,
                    2,
                    2,
                    2,
                    2,
                    2,
                    2,
                    2,
                    2,
                    2,
                    2,
                    2,
                    2,
                    2,
                    2,
                    2,
                    2,
                    2,
                    2,
                    2,
                    2,
                    2,
                    2,
                    2,
                    2,
                    2,
                    2,
                    2,
                    2,
                    2,
                    2,
                    2,
                    2,
                    2,
                    2,
                    2,
                    2,
                    2,
                    2,
                    2,
                    2,
                    2,
                    2,
                    2,
                    2,
                    2,
                    2,
                    2,
                    2,
                    2,
                    2,
                    2,
                    2,
                    2,
                    2,
                    2,
                    1,
                    1,
                    1,
                    1,
                    1,
                    1,
                    1,
                    1,
                    1,
                    1,
                    1,
                    1,
                    1,
                    1,
                    1,
                    1,
                    1,
                    1,
                    1,
                    1,
                    1,
                    1,
                    1,
                    1,
                    1,
                    1,
                    1,
                    1,
                    1,
                    1,
                    1,
                    1,
                    1,
                    1,
                    1,
                    1,
                    1,
                    1,
                    1,
                    1,
                    1,
                    1,
                    1,
                    1,
                    1,
                    1,
                    1,
                    1,
                    1,
                    1,
                    1,
                    1,
                    1,
                    1,
                    1,
                    1,
                    1,
                    1,
                    1,
                    1,
                    1,
                    1,
                    1,
                    1,
                    1,
                    1,
                    1,
                    1,
                    1,
                    1,
                    1,
                    1,
                    1,
                    1,
                    1,
                    1,
                    1,
                    1,
                    1,
                    1,
                    1,
                    1,
                    1,
                    1,
                    1,
                    1,
                    1,
                    1,
                    1,
                    1,
                    1,
                    1,
                    1,
                    1,
                    1,
                    1,
                    1,
                    1,
                    1,
                    1,
                    1,
                    1,
                    1,
                    1,
                    1,
                    1,
                    1,
                    1,
                    1,
                    1,
                    1,
                    1,
                    1,
                    1,
                    1,
                    1,
                    1,
                    1,
                    1,
                    1,
                    1
                  ],
                  "yaxis": "y"
                }
              ],
              "layout": {
                "barmode": "relative",
                "height": 400,
                "legend": {
                  "tracegroupgap": 0
                },
                "showlegend": false,
                "template": {
                  "data": {
                    "bar": [
                      {
                        "error_x": {
                          "color": "#2a3f5f"
                        },
                        "error_y": {
                          "color": "#2a3f5f"
                        },
                        "marker": {
                          "line": {
                            "color": "#E5ECF6",
                            "width": 0.5
                          },
                          "pattern": {
                            "fillmode": "overlay",
                            "size": 10,
                            "solidity": 0.2
                          }
                        },
                        "type": "bar"
                      }
                    ],
                    "barpolar": [
                      {
                        "marker": {
                          "line": {
                            "color": "#E5ECF6",
                            "width": 0.5
                          },
                          "pattern": {
                            "fillmode": "overlay",
                            "size": 10,
                            "solidity": 0.2
                          }
                        },
                        "type": "barpolar"
                      }
                    ],
                    "carpet": [
                      {
                        "aaxis": {
                          "endlinecolor": "#2a3f5f",
                          "gridcolor": "white",
                          "linecolor": "white",
                          "minorgridcolor": "white",
                          "startlinecolor": "#2a3f5f"
                        },
                        "baxis": {
                          "endlinecolor": "#2a3f5f",
                          "gridcolor": "white",
                          "linecolor": "white",
                          "minorgridcolor": "white",
                          "startlinecolor": "#2a3f5f"
                        },
                        "type": "carpet"
                      }
                    ],
                    "choropleth": [
                      {
                        "colorbar": {
                          "outlinewidth": 0,
                          "ticks": ""
                        },
                        "type": "choropleth"
                      }
                    ],
                    "contour": [
                      {
                        "colorbar": {
                          "outlinewidth": 0,
                          "ticks": ""
                        },
                        "colorscale": [
                          [
                            0,
                            "#0d0887"
                          ],
                          [
                            0.1111111111111111,
                            "#46039f"
                          ],
                          [
                            0.2222222222222222,
                            "#7201a8"
                          ],
                          [
                            0.3333333333333333,
                            "#9c179e"
                          ],
                          [
                            0.4444444444444444,
                            "#bd3786"
                          ],
                          [
                            0.5555555555555556,
                            "#d8576b"
                          ],
                          [
                            0.6666666666666666,
                            "#ed7953"
                          ],
                          [
                            0.7777777777777778,
                            "#fb9f3a"
                          ],
                          [
                            0.8888888888888888,
                            "#fdca26"
                          ],
                          [
                            1,
                            "#f0f921"
                          ]
                        ],
                        "type": "contour"
                      }
                    ],
                    "contourcarpet": [
                      {
                        "colorbar": {
                          "outlinewidth": 0,
                          "ticks": ""
                        },
                        "type": "contourcarpet"
                      }
                    ],
                    "heatmap": [
                      {
                        "colorbar": {
                          "outlinewidth": 0,
                          "ticks": ""
                        },
                        "colorscale": [
                          [
                            0,
                            "#0d0887"
                          ],
                          [
                            0.1111111111111111,
                            "#46039f"
                          ],
                          [
                            0.2222222222222222,
                            "#7201a8"
                          ],
                          [
                            0.3333333333333333,
                            "#9c179e"
                          ],
                          [
                            0.4444444444444444,
                            "#bd3786"
                          ],
                          [
                            0.5555555555555556,
                            "#d8576b"
                          ],
                          [
                            0.6666666666666666,
                            "#ed7953"
                          ],
                          [
                            0.7777777777777778,
                            "#fb9f3a"
                          ],
                          [
                            0.8888888888888888,
                            "#fdca26"
                          ],
                          [
                            1,
                            "#f0f921"
                          ]
                        ],
                        "type": "heatmap"
                      }
                    ],
                    "heatmapgl": [
                      {
                        "colorbar": {
                          "outlinewidth": 0,
                          "ticks": ""
                        },
                        "colorscale": [
                          [
                            0,
                            "#0d0887"
                          ],
                          [
                            0.1111111111111111,
                            "#46039f"
                          ],
                          [
                            0.2222222222222222,
                            "#7201a8"
                          ],
                          [
                            0.3333333333333333,
                            "#9c179e"
                          ],
                          [
                            0.4444444444444444,
                            "#bd3786"
                          ],
                          [
                            0.5555555555555556,
                            "#d8576b"
                          ],
                          [
                            0.6666666666666666,
                            "#ed7953"
                          ],
                          [
                            0.7777777777777778,
                            "#fb9f3a"
                          ],
                          [
                            0.8888888888888888,
                            "#fdca26"
                          ],
                          [
                            1,
                            "#f0f921"
                          ]
                        ],
                        "type": "heatmapgl"
                      }
                    ],
                    "histogram": [
                      {
                        "marker": {
                          "pattern": {
                            "fillmode": "overlay",
                            "size": 10,
                            "solidity": 0.2
                          }
                        },
                        "type": "histogram"
                      }
                    ],
                    "histogram2d": [
                      {
                        "colorbar": {
                          "outlinewidth": 0,
                          "ticks": ""
                        },
                        "colorscale": [
                          [
                            0,
                            "#0d0887"
                          ],
                          [
                            0.1111111111111111,
                            "#46039f"
                          ],
                          [
                            0.2222222222222222,
                            "#7201a8"
                          ],
                          [
                            0.3333333333333333,
                            "#9c179e"
                          ],
                          [
                            0.4444444444444444,
                            "#bd3786"
                          ],
                          [
                            0.5555555555555556,
                            "#d8576b"
                          ],
                          [
                            0.6666666666666666,
                            "#ed7953"
                          ],
                          [
                            0.7777777777777778,
                            "#fb9f3a"
                          ],
                          [
                            0.8888888888888888,
                            "#fdca26"
                          ],
                          [
                            1,
                            "#f0f921"
                          ]
                        ],
                        "type": "histogram2d"
                      }
                    ],
                    "histogram2dcontour": [
                      {
                        "colorbar": {
                          "outlinewidth": 0,
                          "ticks": ""
                        },
                        "colorscale": [
                          [
                            0,
                            "#0d0887"
                          ],
                          [
                            0.1111111111111111,
                            "#46039f"
                          ],
                          [
                            0.2222222222222222,
                            "#7201a8"
                          ],
                          [
                            0.3333333333333333,
                            "#9c179e"
                          ],
                          [
                            0.4444444444444444,
                            "#bd3786"
                          ],
                          [
                            0.5555555555555556,
                            "#d8576b"
                          ],
                          [
                            0.6666666666666666,
                            "#ed7953"
                          ],
                          [
                            0.7777777777777778,
                            "#fb9f3a"
                          ],
                          [
                            0.8888888888888888,
                            "#fdca26"
                          ],
                          [
                            1,
                            "#f0f921"
                          ]
                        ],
                        "type": "histogram2dcontour"
                      }
                    ],
                    "mesh3d": [
                      {
                        "colorbar": {
                          "outlinewidth": 0,
                          "ticks": ""
                        },
                        "type": "mesh3d"
                      }
                    ],
                    "parcoords": [
                      {
                        "line": {
                          "colorbar": {
                            "outlinewidth": 0,
                            "ticks": ""
                          }
                        },
                        "type": "parcoords"
                      }
                    ],
                    "pie": [
                      {
                        "automargin": true,
                        "type": "pie"
                      }
                    ],
                    "scatter": [
                      {
                        "fillpattern": {
                          "fillmode": "overlay",
                          "size": 10,
                          "solidity": 0.2
                        },
                        "type": "scatter"
                      }
                    ],
                    "scatter3d": [
                      {
                        "line": {
                          "colorbar": {
                            "outlinewidth": 0,
                            "ticks": ""
                          }
                        },
                        "marker": {
                          "colorbar": {
                            "outlinewidth": 0,
                            "ticks": ""
                          }
                        },
                        "type": "scatter3d"
                      }
                    ],
                    "scattercarpet": [
                      {
                        "marker": {
                          "colorbar": {
                            "outlinewidth": 0,
                            "ticks": ""
                          }
                        },
                        "type": "scattercarpet"
                      }
                    ],
                    "scattergeo": [
                      {
                        "marker": {
                          "colorbar": {
                            "outlinewidth": 0,
                            "ticks": ""
                          }
                        },
                        "type": "scattergeo"
                      }
                    ],
                    "scattergl": [
                      {
                        "marker": {
                          "colorbar": {
                            "outlinewidth": 0,
                            "ticks": ""
                          }
                        },
                        "type": "scattergl"
                      }
                    ],
                    "scattermapbox": [
                      {
                        "marker": {
                          "colorbar": {
                            "outlinewidth": 0,
                            "ticks": ""
                          }
                        },
                        "type": "scattermapbox"
                      }
                    ],
                    "scatterpolar": [
                      {
                        "marker": {
                          "colorbar": {
                            "outlinewidth": 0,
                            "ticks": ""
                          }
                        },
                        "type": "scatterpolar"
                      }
                    ],
                    "scatterpolargl": [
                      {
                        "marker": {
                          "colorbar": {
                            "outlinewidth": 0,
                            "ticks": ""
                          }
                        },
                        "type": "scatterpolargl"
                      }
                    ],
                    "scatterternary": [
                      {
                        "marker": {
                          "colorbar": {
                            "outlinewidth": 0,
                            "ticks": ""
                          }
                        },
                        "type": "scatterternary"
                      }
                    ],
                    "surface": [
                      {
                        "colorbar": {
                          "outlinewidth": 0,
                          "ticks": ""
                        },
                        "colorscale": [
                          [
                            0,
                            "#0d0887"
                          ],
                          [
                            0.1111111111111111,
                            "#46039f"
                          ],
                          [
                            0.2222222222222222,
                            "#7201a8"
                          ],
                          [
                            0.3333333333333333,
                            "#9c179e"
                          ],
                          [
                            0.4444444444444444,
                            "#bd3786"
                          ],
                          [
                            0.5555555555555556,
                            "#d8576b"
                          ],
                          [
                            0.6666666666666666,
                            "#ed7953"
                          ],
                          [
                            0.7777777777777778,
                            "#fb9f3a"
                          ],
                          [
                            0.8888888888888888,
                            "#fdca26"
                          ],
                          [
                            1,
                            "#f0f921"
                          ]
                        ],
                        "type": "surface"
                      }
                    ],
                    "table": [
                      {
                        "cells": {
                          "fill": {
                            "color": "#EBF0F8"
                          },
                          "line": {
                            "color": "white"
                          }
                        },
                        "header": {
                          "fill": {
                            "color": "#C8D4E3"
                          },
                          "line": {
                            "color": "white"
                          }
                        },
                        "type": "table"
                      }
                    ]
                  },
                  "layout": {
                    "annotationdefaults": {
                      "arrowcolor": "#2a3f5f",
                      "arrowhead": 0,
                      "arrowwidth": 1
                    },
                    "autotypenumbers": "strict",
                    "coloraxis": {
                      "colorbar": {
                        "outlinewidth": 0,
                        "ticks": ""
                      }
                    },
                    "colorscale": {
                      "diverging": [
                        [
                          0,
                          "#8e0152"
                        ],
                        [
                          0.1,
                          "#c51b7d"
                        ],
                        [
                          0.2,
                          "#de77ae"
                        ],
                        [
                          0.3,
                          "#f1b6da"
                        ],
                        [
                          0.4,
                          "#fde0ef"
                        ],
                        [
                          0.5,
                          "#f7f7f7"
                        ],
                        [
                          0.6,
                          "#e6f5d0"
                        ],
                        [
                          0.7,
                          "#b8e186"
                        ],
                        [
                          0.8,
                          "#7fbc41"
                        ],
                        [
                          0.9,
                          "#4d9221"
                        ],
                        [
                          1,
                          "#276419"
                        ]
                      ],
                      "sequential": [
                        [
                          0,
                          "#0d0887"
                        ],
                        [
                          0.1111111111111111,
                          "#46039f"
                        ],
                        [
                          0.2222222222222222,
                          "#7201a8"
                        ],
                        [
                          0.3333333333333333,
                          "#9c179e"
                        ],
                        [
                          0.4444444444444444,
                          "#bd3786"
                        ],
                        [
                          0.5555555555555556,
                          "#d8576b"
                        ],
                        [
                          0.6666666666666666,
                          "#ed7953"
                        ],
                        [
                          0.7777777777777778,
                          "#fb9f3a"
                        ],
                        [
                          0.8888888888888888,
                          "#fdca26"
                        ],
                        [
                          1,
                          "#f0f921"
                        ]
                      ],
                      "sequentialminus": [
                        [
                          0,
                          "#0d0887"
                        ],
                        [
                          0.1111111111111111,
                          "#46039f"
                        ],
                        [
                          0.2222222222222222,
                          "#7201a8"
                        ],
                        [
                          0.3333333333333333,
                          "#9c179e"
                        ],
                        [
                          0.4444444444444444,
                          "#bd3786"
                        ],
                        [
                          0.5555555555555556,
                          "#d8576b"
                        ],
                        [
                          0.6666666666666666,
                          "#ed7953"
                        ],
                        [
                          0.7777777777777778,
                          "#fb9f3a"
                        ],
                        [
                          0.8888888888888888,
                          "#fdca26"
                        ],
                        [
                          1,
                          "#f0f921"
                        ]
                      ]
                    },
                    "colorway": [
                      "#636efa",
                      "#EF553B",
                      "#00cc96",
                      "#ab63fa",
                      "#FFA15A",
                      "#19d3f3",
                      "#FF6692",
                      "#B6E880",
                      "#FF97FF",
                      "#FECB52"
                    ],
                    "font": {
                      "color": "#2a3f5f"
                    },
                    "geo": {
                      "bgcolor": "white",
                      "lakecolor": "white",
                      "landcolor": "#E5ECF6",
                      "showlakes": true,
                      "showland": true,
                      "subunitcolor": "white"
                    },
                    "hoverlabel": {
                      "align": "left"
                    },
                    "hovermode": "closest",
                    "mapbox": {
                      "style": "light"
                    },
                    "paper_bgcolor": "white",
                    "plot_bgcolor": "#E5ECF6",
                    "polar": {
                      "angularaxis": {
                        "gridcolor": "white",
                        "linecolor": "white",
                        "ticks": ""
                      },
                      "bgcolor": "#E5ECF6",
                      "radialaxis": {
                        "gridcolor": "white",
                        "linecolor": "white",
                        "ticks": ""
                      }
                    },
                    "scene": {
                      "xaxis": {
                        "backgroundcolor": "#E5ECF6",
                        "gridcolor": "white",
                        "gridwidth": 2,
                        "linecolor": "white",
                        "showbackground": true,
                        "ticks": "",
                        "zerolinecolor": "white"
                      },
                      "yaxis": {
                        "backgroundcolor": "#E5ECF6",
                        "gridcolor": "white",
                        "gridwidth": 2,
                        "linecolor": "white",
                        "showbackground": true,
                        "ticks": "",
                        "zerolinecolor": "white"
                      },
                      "zaxis": {
                        "backgroundcolor": "#E5ECF6",
                        "gridcolor": "white",
                        "gridwidth": 2,
                        "linecolor": "white",
                        "showbackground": true,
                        "ticks": "",
                        "zerolinecolor": "white"
                      }
                    },
                    "shapedefaults": {
                      "line": {
                        "color": "#2a3f5f"
                      }
                    },
                    "ternary": {
                      "aaxis": {
                        "gridcolor": "white",
                        "linecolor": "white",
                        "ticks": ""
                      },
                      "baxis": {
                        "gridcolor": "white",
                        "linecolor": "white",
                        "ticks": ""
                      },
                      "bgcolor": "#E5ECF6",
                      "caxis": {
                        "gridcolor": "white",
                        "linecolor": "white",
                        "ticks": ""
                      }
                    },
                    "title": {
                      "x": 0.05
                    },
                    "xaxis": {
                      "automargin": true,
                      "gridcolor": "white",
                      "linecolor": "white",
                      "ticks": "",
                      "title": {
                        "standoff": 15
                      },
                      "zerolinecolor": "white",
                      "zerolinewidth": 2
                    },
                    "yaxis": {
                      "automargin": true,
                      "gridcolor": "white",
                      "linecolor": "white",
                      "ticks": "",
                      "title": {
                        "standoff": 15
                      },
                      "zerolinecolor": "white",
                      "zerolinewidth": 2
                    }
                  }
                },
                "title": {
                  "text": "Battle Count for Each Agent"
                },
                "xaxis": {
                  "anchor": "y",
                  "domain": [
                    0,
                    1
                  ],
                  "title": {
                    "text": "Agent"
                  }
                },
                "yaxis": {
                  "anchor": "x",
                  "domain": [
                    0,
                    1
                  ],
                  "title": {
                    "text": "Battle Count"
                  }
                }
              }
            }
          },
          "metadata": {},
          "output_type": "display_data"
        }
      ],
      "source": [
        "import plotly.express as px\n",
        "\n",
        "# Extract agent names from both Agent_A and Agent_B\n",
        "agent_names = ratings['Agent_A'].apply(lambda x: x['Agent name']).tolist() + \\\n",
        "              ratings['Agent_B'].apply(lambda x: x['Agent name']).tolist()\n",
        "\n",
        "# Count the occurrences of each agent\n",
        "agent_counts = pd.Series(agent_names).value_counts()\n",
        "\n",
        "# Create the bar plot\n",
        "fig = px.bar(agent_counts,\n",
        "             x=agent_counts.index,\n",
        "             y=agent_counts.values,\n",
        "             title=\"Battle Count for Each Agent\",\n",
        "             text_auto=True)\n",
        "\n",
        "fig.update_layout(xaxis_title=\"Agent\",\n",
        "                  yaxis_title=\"Battle Count\",\n",
        "                  height=400,\n",
        "                  showlegend=False)\n",
        "\n",
        "fig.show()"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 127,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 817
        },
        "id": "5RZa85rmVyys",
        "outputId": "7a9a24f1-71f9-4777-e74d-e1778bfeddb5"
      },
      "outputs": [
        {
          "data": {
            "application/vnd.plotly.v1+json": {
              "config": {
                "plotlyServerURL": "https://plot.ly"
              },
              "data": [
                {
                  "coloraxis": "coloraxis",
                  "hovertemplate": "Agent A: %{y}<br>Agent B: %{x}<br>Count: %{z}<extra></extra>",
                  "name": "0",
                  "texttemplate": "%{z}",
                  "type": "heatmap",
                  "x": [
                    "langchain Wikipedia (gpt-4o-2024-08-06)",
                    "langchain google-serper search agent (gpt-4o-2024-08-06)",
                    "llamaindex brave-search agent (gpt-4o-2024-08-06)",
                    "openai assistant function calling (gpt-4o-2024-08-06)",
                    "langchain Wikipedia (llama-3.1-70B-instruct)",
                    "langchain brave-search agent (gpt-4o-2024-08-06)",
                    "langchain Wikipedia (gpt-4o-mini-2024-07-18)",
                    "langchain Wikipedia (gpt-4o-2024-05-13)",
                    "langchain google-serper search agent (open-mixtral-8x22b)",
                    "langchain Wikipedia (open-mixtral-8x7b)",
                    "langchain Wikipedia (claude-3-5-sonnet-20240620)",
                    "langchain google-serper search agent (gemini-1.5-pro-001)",
                    "langchain Wikipedia (gemini-1.5-pro-001)",
                    "langchain google-serper search agent (gpt-4o-2024-05-13)",
                    "langchain google-serper search agent (claude-3-5-sonnet-20240620)",
                    "langchain google-serper search agent (claude-3-haiku-20240307)",
                    "langchain google-serper search agent (gpt-4-turbo-2024-04-09)",
                    "langchain Wikipedia (llama-3.1-405B-instruct)",
                    "langchain YouTube Search (gpt-4o-2024-08-06)",
                    "langchain Wikipedia (claude-3-opus-20240229)",
                    "langchain google-serper search agent (gemini-1.5-flash-001)",
                    "langchain google-serper search agent (llama-3.1-405B-instruct)",
                    "langchain google-serper search agent (mistral-large-2407)",
                    "langchain Wikipedia (gpt-4-turbo-2024-04-09)",
                    "langchain Wikipedia (gpt-4-0613)",
                    "langchain google-serper search agent (gpt-4-0613)",
                    "langchain google-serper search agent (claude-3-opus-20240229)",
                    "langchain google-serper search agent (llama-3.1-70B-instruct)",
                    "langchain google-serper search agent (open-mixtral-8x7b)",
                    "langchain alpha-vantage stock agent (gpt-4-turbo-2024-04-09)"
                  ],
                  "xaxis": "x",
                  "y": [
                    "langchain Wikipedia (gpt-4o-2024-08-06)",
                    "langchain google-serper search agent (gpt-4o-2024-08-06)",
                    "llamaindex brave-search agent (gpt-4o-2024-08-06)",
                    "openai assistant function calling (gpt-4o-2024-08-06)",
                    "langchain Wikipedia (llama-3.1-70B-instruct)",
                    "langchain brave-search agent (gpt-4o-2024-08-06)",
                    "langchain Wikipedia (gpt-4o-mini-2024-07-18)",
                    "langchain Wikipedia (gpt-4o-2024-05-13)",
                    "langchain google-serper search agent (open-mixtral-8x22b)",
                    "langchain Wikipedia (open-mixtral-8x7b)",
                    "langchain Wikipedia (claude-3-5-sonnet-20240620)",
                    "langchain google-serper search agent (gemini-1.5-pro-001)",
                    "langchain Wikipedia (gemini-1.5-pro-001)",
                    "langchain google-serper search agent (gpt-4o-2024-05-13)",
                    "langchain google-serper search agent (claude-3-5-sonnet-20240620)",
                    "langchain google-serper search agent (claude-3-haiku-20240307)",
                    "langchain google-serper search agent (gpt-4-turbo-2024-04-09)",
                    "langchain Wikipedia (llama-3.1-405B-instruct)",
                    "langchain YouTube Search (gpt-4o-2024-08-06)",
                    "langchain Wikipedia (claude-3-opus-20240229)",
                    "langchain google-serper search agent (gemini-1.5-flash-001)",
                    "langchain google-serper search agent (llama-3.1-405B-instruct)",
                    "langchain google-serper search agent (mistral-large-2407)",
                    "langchain Wikipedia (gpt-4-turbo-2024-04-09)",
                    "langchain Wikipedia (gpt-4-0613)",
                    "langchain google-serper search agent (gpt-4-0613)",
                    "langchain google-serper search agent (claude-3-opus-20240229)",
                    "langchain google-serper search agent (llama-3.1-70B-instruct)",
                    "langchain google-serper search agent (open-mixtral-8x7b)",
                    "langchain alpha-vantage stock agent (gpt-4-turbo-2024-04-09)"
                  ],
                  "yaxis": "y",
                  "z": [
                    [
                      2,
                      2,
                      6,
                      1,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      1,
                      2,
                      0,
                      0,
                      0,
                      0,
                      1,
                      0,
                      2,
                      1,
                      0,
                      0,
                      0,
                      0,
                      1,
                      0,
                      0,
                      1,
                      0,
                      0
                    ],
                    [
                      2,
                      0,
                      2,
                      0,
                      0,
                      9,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      1,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0
                    ],
                    [
                      6,
                      2,
                      0,
                      2,
                      1,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      3,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0
                    ],
                    [
                      1,
                      0,
                      2,
                      0,
                      1,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      1,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0
                    ],
                    [
                      0,
                      0,
                      1,
                      1,
                      0,
                      0,
                      0,
                      0,
                      2,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      1,
                      0,
                      0,
                      1,
                      0,
                      1,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      1,
                      1,
                      0
                    ],
                    [
                      0,
                      9,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      1,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0
                    ],
                    [
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      1,
                      0,
                      0,
                      0,
                      0,
                      1,
                      0,
                      1,
                      0,
                      0,
                      0,
                      0,
                      2,
                      0,
                      1,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0
                    ],
                    [
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      1,
                      0,
                      0,
                      1,
                      0,
                      1,
                      2,
                      0,
                      0,
                      0,
                      1,
                      0,
                      0,
                      2,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0
                    ],
                    [
                      0,
                      0,
                      0,
                      0,
                      2,
                      0,
                      1,
                      1,
                      0,
                      0,
                      1,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      1,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      1,
                      0,
                      0,
                      0,
                      0,
                      0
                    ],
                    [
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      1,
                      2,
                      1,
                      0,
                      1,
                      0,
                      2,
                      0,
                      1,
                      0,
                      0,
                      1,
                      0,
                      1,
                      0,
                      0
                    ],
                    [
                      1,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      1,
                      0,
                      0,
                      1,
                      0,
                      0,
                      2,
                      0,
                      1,
                      0,
                      1,
                      0,
                      0,
                      1,
                      0,
                      0,
                      0,
                      0,
                      0,
                      1,
                      0,
                      0
                    ],
                    [
                      2,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      1,
                      0,
                      0,
                      1,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      1,
                      0,
                      1,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      1
                    ],
                    [
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      1,
                      0,
                      0,
                      1,
                      0,
                      0,
                      0,
                      0,
                      1,
                      0,
                      0,
                      0,
                      2,
                      2,
                      0,
                      0,
                      0
                    ],
                    [
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      1,
                      1,
                      0,
                      0,
                      0,
                      0,
                      1,
                      0,
                      0,
                      0,
                      0,
                      1,
                      0,
                      2,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0
                    ],
                    [
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      2,
                      0,
                      1,
                      2,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      1,
                      1,
                      0,
                      0,
                      0,
                      0,
                      0
                    ],
                    [
                      0,
                      0,
                      0,
                      0,
                      1,
                      0,
                      1,
                      0,
                      0,
                      2,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      1,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0
                    ],
                    [
                      1,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      1,
                      1,
                      0,
                      1,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      1,
                      0,
                      0,
                      0,
                      1,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0
                    ],
                    [
                      0,
                      0,
                      0,
                      1,
                      0,
                      0,
                      0,
                      0,
                      1,
                      0,
                      0,
                      1,
                      0,
                      1,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      1,
                      2,
                      0,
                      0,
                      0
                    ],
                    [
                      2,
                      0,
                      3,
                      0,
                      1,
                      0,
                      0,
                      1,
                      0,
                      1,
                      1,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      1,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0
                    ],
                    [
                      1,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      1,
                      0,
                      2,
                      0,
                      0,
                      1,
                      0,
                      0,
                      0,
                      0,
                      0,
                      2,
                      0,
                      0,
                      1,
                      0,
                      0,
                      0,
                      0
                    ],
                    [
                      0,
                      0,
                      0,
                      0,
                      1,
                      0,
                      2,
                      0,
                      0,
                      2,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0
                    ],
                    [
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      2,
                      0,
                      0,
                      1,
                      0,
                      1,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      1,
                      0,
                      0,
                      0,
                      0,
                      0
                    ],
                    [
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      1,
                      0,
                      0,
                      1,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      1,
                      2,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0
                    ],
                    [
                      0,
                      1,
                      0,
                      0,
                      0,
                      1,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      1,
                      1,
                      1,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      2,
                      0
                    ],
                    [
                      1,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      1,
                      0,
                      0,
                      0,
                      0,
                      0,
                      1,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      1,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      1,
                      0
                    ],
                    [
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      1,
                      0,
                      0,
                      2,
                      0,
                      0,
                      0,
                      0,
                      1,
                      0,
                      1,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0
                    ],
                    [
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      2,
                      0,
                      0,
                      0,
                      0,
                      2,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0
                    ],
                    [
                      1,
                      0,
                      0,
                      0,
                      1,
                      0,
                      0,
                      0,
                      0,
                      1,
                      1,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0
                    ],
                    [
                      0,
                      0,
                      0,
                      0,
                      1,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      2,
                      1,
                      0,
                      0,
                      0,
                      0,
                      0
                    ],
                    [
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      1,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0
                    ]
                  ]
                }
              ],
              "layout": {
                "coloraxis": {
                  "colorscale": [
                    [
                      0,
                      "#0d0887"
                    ],
                    [
                      0.1111111111111111,
                      "#46039f"
                    ],
                    [
                      0.2222222222222222,
                      "#7201a8"
                    ],
                    [
                      0.3333333333333333,
                      "#9c179e"
                    ],
                    [
                      0.4444444444444444,
                      "#bd3786"
                    ],
                    [
                      0.5555555555555556,
                      "#d8576b"
                    ],
                    [
                      0.6666666666666666,
                      "#ed7953"
                    ],
                    [
                      0.7777777777777778,
                      "#fb9f3a"
                    ],
                    [
                      0.8888888888888888,
                      "#fdca26"
                    ],
                    [
                      1,
                      "#f0f921"
                    ]
                  ]
                },
                "font": {
                  "size": 10
                },
                "height": 800,
                "template": {
                  "data": {
                    "bar": [
                      {
                        "error_x": {
                          "color": "#2a3f5f"
                        },
                        "error_y": {
                          "color": "#2a3f5f"
                        },
                        "marker": {
                          "line": {
                            "color": "#E5ECF6",
                            "width": 0.5
                          },
                          "pattern": {
                            "fillmode": "overlay",
                            "size": 10,
                            "solidity": 0.2
                          }
                        },
                        "type": "bar"
                      }
                    ],
                    "barpolar": [
                      {
                        "marker": {
                          "line": {
                            "color": "#E5ECF6",
                            "width": 0.5
                          },
                          "pattern": {
                            "fillmode": "overlay",
                            "size": 10,
                            "solidity": 0.2
                          }
                        },
                        "type": "barpolar"
                      }
                    ],
                    "carpet": [
                      {
                        "aaxis": {
                          "endlinecolor": "#2a3f5f",
                          "gridcolor": "white",
                          "linecolor": "white",
                          "minorgridcolor": "white",
                          "startlinecolor": "#2a3f5f"
                        },
                        "baxis": {
                          "endlinecolor": "#2a3f5f",
                          "gridcolor": "white",
                          "linecolor": "white",
                          "minorgridcolor": "white",
                          "startlinecolor": "#2a3f5f"
                        },
                        "type": "carpet"
                      }
                    ],
                    "choropleth": [
                      {
                        "colorbar": {
                          "outlinewidth": 0,
                          "ticks": ""
                        },
                        "type": "choropleth"
                      }
                    ],
                    "contour": [
                      {
                        "colorbar": {
                          "outlinewidth": 0,
                          "ticks": ""
                        },
                        "colorscale": [
                          [
                            0,
                            "#0d0887"
                          ],
                          [
                            0.1111111111111111,
                            "#46039f"
                          ],
                          [
                            0.2222222222222222,
                            "#7201a8"
                          ],
                          [
                            0.3333333333333333,
                            "#9c179e"
                          ],
                          [
                            0.4444444444444444,
                            "#bd3786"
                          ],
                          [
                            0.5555555555555556,
                            "#d8576b"
                          ],
                          [
                            0.6666666666666666,
                            "#ed7953"
                          ],
                          [
                            0.7777777777777778,
                            "#fb9f3a"
                          ],
                          [
                            0.8888888888888888,
                            "#fdca26"
                          ],
                          [
                            1,
                            "#f0f921"
                          ]
                        ],
                        "type": "contour"
                      }
                    ],
                    "contourcarpet": [
                      {
                        "colorbar": {
                          "outlinewidth": 0,
                          "ticks": ""
                        },
                        "type": "contourcarpet"
                      }
                    ],
                    "heatmap": [
                      {
                        "colorbar": {
                          "outlinewidth": 0,
                          "ticks": ""
                        },
                        "colorscale": [
                          [
                            0,
                            "#0d0887"
                          ],
                          [
                            0.1111111111111111,
                            "#46039f"
                          ],
                          [
                            0.2222222222222222,
                            "#7201a8"
                          ],
                          [
                            0.3333333333333333,
                            "#9c179e"
                          ],
                          [
                            0.4444444444444444,
                            "#bd3786"
                          ],
                          [
                            0.5555555555555556,
                            "#d8576b"
                          ],
                          [
                            0.6666666666666666,
                            "#ed7953"
                          ],
                          [
                            0.7777777777777778,
                            "#fb9f3a"
                          ],
                          [
                            0.8888888888888888,
                            "#fdca26"
                          ],
                          [
                            1,
                            "#f0f921"
                          ]
                        ],
                        "type": "heatmap"
                      }
                    ],
                    "heatmapgl": [
                      {
                        "colorbar": {
                          "outlinewidth": 0,
                          "ticks": ""
                        },
                        "colorscale": [
                          [
                            0,
                            "#0d0887"
                          ],
                          [
                            0.1111111111111111,
                            "#46039f"
                          ],
                          [
                            0.2222222222222222,
                            "#7201a8"
                          ],
                          [
                            0.3333333333333333,
                            "#9c179e"
                          ],
                          [
                            0.4444444444444444,
                            "#bd3786"
                          ],
                          [
                            0.5555555555555556,
                            "#d8576b"
                          ],
                          [
                            0.6666666666666666,
                            "#ed7953"
                          ],
                          [
                            0.7777777777777778,
                            "#fb9f3a"
                          ],
                          [
                            0.8888888888888888,
                            "#fdca26"
                          ],
                          [
                            1,
                            "#f0f921"
                          ]
                        ],
                        "type": "heatmapgl"
                      }
                    ],
                    "histogram": [
                      {
                        "marker": {
                          "pattern": {
                            "fillmode": "overlay",
                            "size": 10,
                            "solidity": 0.2
                          }
                        },
                        "type": "histogram"
                      }
                    ],
                    "histogram2d": [
                      {
                        "colorbar": {
                          "outlinewidth": 0,
                          "ticks": ""
                        },
                        "colorscale": [
                          [
                            0,
                            "#0d0887"
                          ],
                          [
                            0.1111111111111111,
                            "#46039f"
                          ],
                          [
                            0.2222222222222222,
                            "#7201a8"
                          ],
                          [
                            0.3333333333333333,
                            "#9c179e"
                          ],
                          [
                            0.4444444444444444,
                            "#bd3786"
                          ],
                          [
                            0.5555555555555556,
                            "#d8576b"
                          ],
                          [
                            0.6666666666666666,
                            "#ed7953"
                          ],
                          [
                            0.7777777777777778,
                            "#fb9f3a"
                          ],
                          [
                            0.8888888888888888,
                            "#fdca26"
                          ],
                          [
                            1,
                            "#f0f921"
                          ]
                        ],
                        "type": "histogram2d"
                      }
                    ],
                    "histogram2dcontour": [
                      {
                        "colorbar": {
                          "outlinewidth": 0,
                          "ticks": ""
                        },
                        "colorscale": [
                          [
                            0,
                            "#0d0887"
                          ],
                          [
                            0.1111111111111111,
                            "#46039f"
                          ],
                          [
                            0.2222222222222222,
                            "#7201a8"
                          ],
                          [
                            0.3333333333333333,
                            "#9c179e"
                          ],
                          [
                            0.4444444444444444,
                            "#bd3786"
                          ],
                          [
                            0.5555555555555556,
                            "#d8576b"
                          ],
                          [
                            0.6666666666666666,
                            "#ed7953"
                          ],
                          [
                            0.7777777777777778,
                            "#fb9f3a"
                          ],
                          [
                            0.8888888888888888,
                            "#fdca26"
                          ],
                          [
                            1,
                            "#f0f921"
                          ]
                        ],
                        "type": "histogram2dcontour"
                      }
                    ],
                    "mesh3d": [
                      {
                        "colorbar": {
                          "outlinewidth": 0,
                          "ticks": ""
                        },
                        "type": "mesh3d"
                      }
                    ],
                    "parcoords": [
                      {
                        "line": {
                          "colorbar": {
                            "outlinewidth": 0,
                            "ticks": ""
                          }
                        },
                        "type": "parcoords"
                      }
                    ],
                    "pie": [
                      {
                        "automargin": true,
                        "type": "pie"
                      }
                    ],
                    "scatter": [
                      {
                        "fillpattern": {
                          "fillmode": "overlay",
                          "size": 10,
                          "solidity": 0.2
                        },
                        "type": "scatter"
                      }
                    ],
                    "scatter3d": [
                      {
                        "line": {
                          "colorbar": {
                            "outlinewidth": 0,
                            "ticks": ""
                          }
                        },
                        "marker": {
                          "colorbar": {
                            "outlinewidth": 0,
                            "ticks": ""
                          }
                        },
                        "type": "scatter3d"
                      }
                    ],
                    "scattercarpet": [
                      {
                        "marker": {
                          "colorbar": {
                            "outlinewidth": 0,
                            "ticks": ""
                          }
                        },
                        "type": "scattercarpet"
                      }
                    ],
                    "scattergeo": [
                      {
                        "marker": {
                          "colorbar": {
                            "outlinewidth": 0,
                            "ticks": ""
                          }
                        },
                        "type": "scattergeo"
                      }
                    ],
                    "scattergl": [
                      {
                        "marker": {
                          "colorbar": {
                            "outlinewidth": 0,
                            "ticks": ""
                          }
                        },
                        "type": "scattergl"
                      }
                    ],
                    "scattermapbox": [
                      {
                        "marker": {
                          "colorbar": {
                            "outlinewidth": 0,
                            "ticks": ""
                          }
                        },
                        "type": "scattermapbox"
                      }
                    ],
                    "scatterpolar": [
                      {
                        "marker": {
                          "colorbar": {
                            "outlinewidth": 0,
                            "ticks": ""
                          }
                        },
                        "type": "scatterpolar"
                      }
                    ],
                    "scatterpolargl": [
                      {
                        "marker": {
                          "colorbar": {
                            "outlinewidth": 0,
                            "ticks": ""
                          }
                        },
                        "type": "scatterpolargl"
                      }
                    ],
                    "scatterternary": [
                      {
                        "marker": {
                          "colorbar": {
                            "outlinewidth": 0,
                            "ticks": ""
                          }
                        },
                        "type": "scatterternary"
                      }
                    ],
                    "surface": [
                      {
                        "colorbar": {
                          "outlinewidth": 0,
                          "ticks": ""
                        },
                        "colorscale": [
                          [
                            0,
                            "#0d0887"
                          ],
                          [
                            0.1111111111111111,
                            "#46039f"
                          ],
                          [
                            0.2222222222222222,
                            "#7201a8"
                          ],
                          [
                            0.3333333333333333,
                            "#9c179e"
                          ],
                          [
                            0.4444444444444444,
                            "#bd3786"
                          ],
                          [
                            0.5555555555555556,
                            "#d8576b"
                          ],
                          [
                            0.6666666666666666,
                            "#ed7953"
                          ],
                          [
                            0.7777777777777778,
                            "#fb9f3a"
                          ],
                          [
                            0.8888888888888888,
                            "#fdca26"
                          ],
                          [
                            1,
                            "#f0f921"
                          ]
                        ],
                        "type": "surface"
                      }
                    ],
                    "table": [
                      {
                        "cells": {
                          "fill": {
                            "color": "#EBF0F8"
                          },
                          "line": {
                            "color": "white"
                          }
                        },
                        "header": {
                          "fill": {
                            "color": "#C8D4E3"
                          },
                          "line": {
                            "color": "white"
                          }
                        },
                        "type": "table"
                      }
                    ]
                  },
                  "layout": {
                    "annotationdefaults": {
                      "arrowcolor": "#2a3f5f",
                      "arrowhead": 0,
                      "arrowwidth": 1
                    },
                    "autotypenumbers": "strict",
                    "coloraxis": {
                      "colorbar": {
                        "outlinewidth": 0,
                        "ticks": ""
                      }
                    },
                    "colorscale": {
                      "diverging": [
                        [
                          0,
                          "#8e0152"
                        ],
                        [
                          0.1,
                          "#c51b7d"
                        ],
                        [
                          0.2,
                          "#de77ae"
                        ],
                        [
                          0.3,
                          "#f1b6da"
                        ],
                        [
                          0.4,
                          "#fde0ef"
                        ],
                        [
                          0.5,
                          "#f7f7f7"
                        ],
                        [
                          0.6,
                          "#e6f5d0"
                        ],
                        [
                          0.7,
                          "#b8e186"
                        ],
                        [
                          0.8,
                          "#7fbc41"
                        ],
                        [
                          0.9,
                          "#4d9221"
                        ],
                        [
                          1,
                          "#276419"
                        ]
                      ],
                      "sequential": [
                        [
                          0,
                          "#0d0887"
                        ],
                        [
                          0.1111111111111111,
                          "#46039f"
                        ],
                        [
                          0.2222222222222222,
                          "#7201a8"
                        ],
                        [
                          0.3333333333333333,
                          "#9c179e"
                        ],
                        [
                          0.4444444444444444,
                          "#bd3786"
                        ],
                        [
                          0.5555555555555556,
                          "#d8576b"
                        ],
                        [
                          0.6666666666666666,
                          "#ed7953"
                        ],
                        [
                          0.7777777777777778,
                          "#fb9f3a"
                        ],
                        [
                          0.8888888888888888,
                          "#fdca26"
                        ],
                        [
                          1,
                          "#f0f921"
                        ]
                      ],
                      "sequentialminus": [
                        [
                          0,
                          "#0d0887"
                        ],
                        [
                          0.1111111111111111,
                          "#46039f"
                        ],
                        [
                          0.2222222222222222,
                          "#7201a8"
                        ],
                        [
                          0.3333333333333333,
                          "#9c179e"
                        ],
                        [
                          0.4444444444444444,
                          "#bd3786"
                        ],
                        [
                          0.5555555555555556,
                          "#d8576b"
                        ],
                        [
                          0.6666666666666666,
                          "#ed7953"
                        ],
                        [
                          0.7777777777777778,
                          "#fb9f3a"
                        ],
                        [
                          0.8888888888888888,
                          "#fdca26"
                        ],
                        [
                          1,
                          "#f0f921"
                        ]
                      ]
                    },
                    "colorway": [
                      "#636efa",
                      "#EF553B",
                      "#00cc96",
                      "#ab63fa",
                      "#FFA15A",
                      "#19d3f3",
                      "#FF6692",
                      "#B6E880",
                      "#FF97FF",
                      "#FECB52"
                    ],
                    "font": {
                      "color": "#2a3f5f"
                    },
                    "geo": {
                      "bgcolor": "white",
                      "lakecolor": "white",
                      "landcolor": "#E5ECF6",
                      "showlakes": true,
                      "showland": true,
                      "subunitcolor": "white"
                    },
                    "hoverlabel": {
                      "align": "left"
                    },
                    "hovermode": "closest",
                    "mapbox": {
                      "style": "light"
                    },
                    "paper_bgcolor": "white",
                    "plot_bgcolor": "#E5ECF6",
                    "polar": {
                      "angularaxis": {
                        "gridcolor": "white",
                        "linecolor": "white",
                        "ticks": ""
                      },
                      "bgcolor": "#E5ECF6",
                      "radialaxis": {
                        "gridcolor": "white",
                        "linecolor": "white",
                        "ticks": ""
                      }
                    },
                    "scene": {
                      "xaxis": {
                        "backgroundcolor": "#E5ECF6",
                        "gridcolor": "white",
                        "gridwidth": 2,
                        "linecolor": "white",
                        "showbackground": true,
                        "ticks": "",
                        "zerolinecolor": "white"
                      },
                      "yaxis": {
                        "backgroundcolor": "#E5ECF6",
                        "gridcolor": "white",
                        "gridwidth": 2,
                        "linecolor": "white",
                        "showbackground": true,
                        "ticks": "",
                        "zerolinecolor": "white"
                      },
                      "zaxis": {
                        "backgroundcolor": "#E5ECF6",
                        "gridcolor": "white",
                        "gridwidth": 2,
                        "linecolor": "white",
                        "showbackground": true,
                        "ticks": "",
                        "zerolinecolor": "white"
                      }
                    },
                    "shapedefaults": {
                      "line": {
                        "color": "#2a3f5f"
                      }
                    },
                    "ternary": {
                      "aaxis": {
                        "gridcolor": "white",
                        "linecolor": "white",
                        "ticks": ""
                      },
                      "baxis": {
                        "gridcolor": "white",
                        "linecolor": "white",
                        "ticks": ""
                      },
                      "bgcolor": "#E5ECF6",
                      "caxis": {
                        "gridcolor": "white",
                        "linecolor": "white",
                        "ticks": ""
                      }
                    },
                    "title": {
                      "x": 0.05
                    },
                    "xaxis": {
                      "automargin": true,
                      "gridcolor": "white",
                      "linecolor": "white",
                      "ticks": "",
                      "title": {
                        "standoff": 15
                      },
                      "zerolinecolor": "white",
                      "zerolinewidth": 2
                    },
                    "yaxis": {
                      "automargin": true,
                      "gridcolor": "white",
                      "linecolor": "white",
                      "ticks": "",
                      "title": {
                        "standoff": 15
                      },
                      "zerolinecolor": "white",
                      "zerolinewidth": 2
                    }
                  }
                },
                "title": {
                  "text": "Tie Count for Each Combination of Agents",
                  "x": 0.5,
                  "y": 0.07
                },
                "width": 800,
                "xaxis": {
                  "anchor": "y",
                  "constrain": "domain",
                  "domain": [
                    0,
                    1
                  ],
                  "scaleanchor": "y",
                  "side": "top",
                  "title": {
                    "text": "Agent B"
                  }
                },
                "yaxis": {
                  "anchor": "x",
                  "autorange": "reversed",
                  "constrain": "domain",
                  "domain": [
                    0,
                    1
                  ],
                  "title": {
                    "text": "Agent A"
                  }
                }
              }
            }
          },
          "metadata": {},
          "output_type": "display_data"
        }
      ],
      "source": [
        "import plotly.express as px\n",
        "import pandas as pd\n",
        "\n",
        "def visualize_battle_count(battles, title, show_num_models=30):\n",
        "    # Extract agent names and create a DataFrame\n",
        "    battle_df = pd.DataFrame({\n",
        "        'leftAgent': battles['Agent_A'].apply(lambda x: x['Agent name']),\n",
        "        'rightAgent': battles['Agent_B'].apply(lambda x: x['Agent name']),\n",
        "        'rating': battles['Rating']\n",
        "    })\n",
        "\n",
        "    # Create the pivot table\n",
        "    ptbl = pd.pivot_table(battle_df, index=\"leftAgent\", columns=\"rightAgent\", aggfunc=\"size\", fill_value=0)\n",
        "\n",
        "    # Get all unique agents from both leftAgent and rightAgent columns\n",
        "    all_agents = pd.Index(ptbl.index.union(ptbl.columns))\n",
        "\n",
        "    # Reindex the pivot table to be square, filling missing values with 0\n",
        "    ptbl = ptbl.reindex(index=all_agents, columns=all_agents, fill_value=0)\n",
        "\n",
        "    # Sum the pivot table and its transpose to get symmetric battle counts\n",
        "    battle_counts = ptbl + ptbl.T\n",
        "\n",
        "    # Fill any remaining NaN values with 0 (just in case)\n",
        "    battle_counts.fillna(0, inplace=True)\n",
        "\n",
        "    # Sort the agents by the sum of their counts and get the top N agents\n",
        "    ordering = battle_counts.sum().sort_values(ascending=False).index\n",
        "    ordering = ordering[:show_num_models]  # Limit to top N agents\n",
        "\n",
        "    # Extract the ordered battle counts matrix\n",
        "    ordered_battle_counts = battle_counts.loc[ordering, ordering]\n",
        "\n",
        "    # Generate the heatmap\n",
        "    fig = px.imshow(ordered_battle_counts,\n",
        "                    title=title, text_auto=True)\n",
        "    fig.update_layout(xaxis_title=\"Agent B\",\n",
        "                      yaxis_title=\"Agent A\",\n",
        "                      xaxis_side=\"top\", height=800, width=800,\n",
        "                      title_y=0.07, title_x=0.5,\n",
        "                      font=dict(size=10))\n",
        "    fig.update_traces(hovertemplate=\n",
        "                      \"Agent A: %{y}<br>Agent B: %{x}<br>Count: %{z}<extra></extra>\")\n",
        "    return fig\n",
        "\n",
        "# Generate heatmap for Tie ratings\n",
        "fig = visualize_battle_count(ratings[ratings['Rating'] == 'Tie'],\n",
        "                             title=\"Tie Count for Each Combination of Agents\", show_num_models=30)\n",
        "fig.show()"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 128,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 817
        },
        "id": "7ddLxQP-YGzS",
        "outputId": "5c7c82a4-e814-4dfd-8333-71f33a0bab91"
      },
      "outputs": [
        {
          "data": {
            "application/vnd.plotly.v1+json": {
              "config": {
                "plotlyServerURL": "https://plot.ly"
              },
              "data": [
                {
                  "coloraxis": "coloraxis",
                  "hovertemplate": "Agent A: %{y}<br>Agent B: %{x}<br>Count: %{z}<extra></extra>",
                  "name": "0",
                  "texttemplate": "%{z}",
                  "type": "heatmap",
                  "x": [
                    "langchain google-serper search agent (gpt-4o-2024-08-06)",
                    "langchain brave-search agent (gpt-4o-2024-08-06)",
                    "langchain Wikipedia (gpt-4o-2024-08-06)",
                    "langchain google-serper search agent (gpt-4-0613)",
                    "langchain Wikipedia (claude-3-5-sonnet-20240620)",
                    "langchain Wikipedia (gpt-4-0613)",
                    "langchain Wikipedia (open-mixtral-8x7b)",
                    "langchain google-serper search agent (gpt-4-turbo-2024-04-09)",
                    "langchain Wikipedia (llama-3.1-8B-instruct)",
                    "langchain Wikipedia (gpt-4o-mini-2024-07-18)",
                    "langchain google-serper search agent (mistral-large-2407)",
                    "langchain Wikipedia (gpt-4-turbo-2024-04-09)",
                    "langchain google-serper search agent (gpt-4o-2024-05-13)",
                    "langchain Wikipedia (claude-3-opus-20240229)",
                    "langchain google-serper search agent (open-mixtral-8x7b)",
                    "langchain google-serper search agent (claude-3-opus-20240229)",
                    "langchain Wikipedia (gpt-4o-2024-05-13)",
                    "langchain google-serper search agent (claude-3-5-sonnet-20240620)",
                    "langchain google-serper search agent (llama-3.1-8B-instruct)",
                    "llamaindex brave-search agent (gpt-4o-2024-08-06)",
                    "langchain Wikipedia (open-mixtral-8x22b)",
                    "langchain google-serper search agent (gpt-4o-mini-2024-07-18)",
                    "langchain google-serper search agent (gemini-1.5-pro-001)",
                    "langchain Wikipedia (gemini-1.5-flash-001)",
                    "langchain google-serper search agent (llama-3.1-70B-instruct)",
                    "langchain google-serper search agent (gemini-1.5-flash-001)",
                    "langchain google-serper search agent (open-mixtral-8x22b)",
                    "langchain Wikipedia (mistral-large-2407)",
                    "langchain Wikipedia (llama-3.1-70B-instruct)",
                    "langchain google-serper search agent (llama-3.1-405B-instruct)"
                  ],
                  "xaxis": "x",
                  "y": [
                    "langchain google-serper search agent (gpt-4o-2024-08-06)",
                    "langchain brave-search agent (gpt-4o-2024-08-06)",
                    "langchain Wikipedia (gpt-4o-2024-08-06)",
                    "langchain google-serper search agent (gpt-4-0613)",
                    "langchain Wikipedia (claude-3-5-sonnet-20240620)",
                    "langchain Wikipedia (gpt-4-0613)",
                    "langchain Wikipedia (open-mixtral-8x7b)",
                    "langchain google-serper search agent (gpt-4-turbo-2024-04-09)",
                    "langchain Wikipedia (llama-3.1-8B-instruct)",
                    "langchain Wikipedia (gpt-4o-mini-2024-07-18)",
                    "langchain google-serper search agent (mistral-large-2407)",
                    "langchain Wikipedia (gpt-4-turbo-2024-04-09)",
                    "langchain google-serper search agent (gpt-4o-2024-05-13)",
                    "langchain Wikipedia (claude-3-opus-20240229)",
                    "langchain google-serper search agent (open-mixtral-8x7b)",
                    "langchain google-serper search agent (claude-3-opus-20240229)",
                    "langchain Wikipedia (gpt-4o-2024-05-13)",
                    "langchain google-serper search agent (claude-3-5-sonnet-20240620)",
                    "langchain google-serper search agent (llama-3.1-8B-instruct)",
                    "llamaindex brave-search agent (gpt-4o-2024-08-06)",
                    "langchain Wikipedia (open-mixtral-8x22b)",
                    "langchain google-serper search agent (gpt-4o-mini-2024-07-18)",
                    "langchain google-serper search agent (gemini-1.5-pro-001)",
                    "langchain Wikipedia (gemini-1.5-flash-001)",
                    "langchain google-serper search agent (llama-3.1-70B-instruct)",
                    "langchain google-serper search agent (gemini-1.5-flash-001)",
                    "langchain google-serper search agent (open-mixtral-8x22b)",
                    "langchain Wikipedia (mistral-large-2407)",
                    "langchain Wikipedia (llama-3.1-70B-instruct)",
                    "langchain google-serper search agent (llama-3.1-405B-instruct)"
                  ],
                  "yaxis": "y",
                  "z": [
                    [
                      0,
                      77,
                      1,
                      0,
                      3,
                      3,
                      3,
                      0,
                      4,
                      1,
                      0,
                      1,
                      0,
                      5,
                      0,
                      1,
                      4,
                      0,
                      0,
                      2,
                      0,
                      0,
                      0,
                      2,
                      0,
                      0,
                      0,
                      6,
                      4,
                      0
                    ],
                    [
                      77,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      2,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0
                    ],
                    [
                      1,
                      0,
                      0,
                      2,
                      0,
                      0,
                      1,
                      2,
                      0,
                      1,
                      1,
                      0,
                      2,
                      0,
                      3,
                      2,
                      0,
                      2,
                      0,
                      17,
                      0,
                      2,
                      0,
                      0,
                      4,
                      0,
                      3,
                      0,
                      0,
                      1
                    ],
                    [
                      0,
                      0,
                      2,
                      0,
                      6,
                      5,
                      4,
                      0,
                      4,
                      1,
                      0,
                      0,
                      0,
                      1,
                      0,
                      0,
                      2,
                      0,
                      0,
                      0,
                      3,
                      0,
                      0,
                      1,
                      0,
                      0,
                      0,
                      1,
                      1,
                      0
                    ],
                    [
                      3,
                      0,
                      0,
                      6,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      3,
                      0,
                      2,
                      0,
                      2,
                      1,
                      0,
                      3,
                      3,
                      0,
                      0,
                      3,
                      3,
                      0,
                      4,
                      3,
                      1,
                      0,
                      0,
                      3
                    ],
                    [
                      3,
                      0,
                      0,
                      5,
                      0,
                      0,
                      0,
                      1,
                      0,
                      0,
                      3,
                      0,
                      2,
                      0,
                      3,
                      2,
                      0,
                      1,
                      1,
                      0,
                      0,
                      1,
                      2,
                      0,
                      3,
                      2,
                      1,
                      0,
                      0,
                      4
                    ],
                    [
                      3,
                      0,
                      1,
                      4,
                      0,
                      0,
                      0,
                      1,
                      0,
                      0,
                      2,
                      0,
                      1,
                      0,
                      5,
                      5,
                      0,
                      4,
                      5,
                      0,
                      0,
                      2,
                      2,
                      0,
                      5,
                      4,
                      3,
                      0,
                      0,
                      3
                    ],
                    [
                      0,
                      0,
                      2,
                      0,
                      0,
                      1,
                      1,
                      0,
                      6,
                      5,
                      0,
                      0,
                      0,
                      5,
                      0,
                      0,
                      4,
                      0,
                      0,
                      1,
                      1,
                      0,
                      0,
                      4,
                      0,
                      0,
                      0,
                      0,
                      3,
                      0
                    ],
                    [
                      4,
                      0,
                      0,
                      4,
                      0,
                      0,
                      0,
                      6,
                      0,
                      0,
                      5,
                      0,
                      1,
                      0,
                      2,
                      3,
                      0,
                      3,
                      1,
                      0,
                      0,
                      2,
                      2,
                      0,
                      3,
                      2,
                      1,
                      0,
                      0,
                      1
                    ],
                    [
                      1,
                      0,
                      1,
                      1,
                      0,
                      0,
                      0,
                      5,
                      0,
                      0,
                      3,
                      0,
                      2,
                      0,
                      4,
                      1,
                      0,
                      2,
                      4,
                      0,
                      0,
                      1,
                      1,
                      0,
                      2,
                      1,
                      6,
                      0,
                      0,
                      4
                    ],
                    [
                      0,
                      0,
                      1,
                      0,
                      3,
                      3,
                      2,
                      0,
                      5,
                      3,
                      0,
                      3,
                      0,
                      3,
                      0,
                      0,
                      2,
                      0,
                      0,
                      0,
                      3,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      2,
                      0
                    ],
                    [
                      1,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      3,
                      0,
                      4,
                      0,
                      2,
                      1,
                      0,
                      4,
                      5,
                      0,
                      0,
                      2,
                      4,
                      0,
                      0,
                      2,
                      2,
                      0,
                      0,
                      0
                    ],
                    [
                      0,
                      0,
                      2,
                      0,
                      2,
                      2,
                      1,
                      0,
                      1,
                      2,
                      0,
                      4,
                      0,
                      2,
                      0,
                      0,
                      3,
                      0,
                      0,
                      0,
                      4,
                      0,
                      0,
                      5,
                      0,
                      0,
                      0,
                      1,
                      4,
                      0
                    ],
                    [
                      5,
                      0,
                      0,
                      1,
                      0,
                      0,
                      0,
                      5,
                      0,
                      0,
                      3,
                      0,
                      2,
                      0,
                      4,
                      4,
                      0,
                      1,
                      3,
                      0,
                      0,
                      2,
                      2,
                      0,
                      2,
                      2,
                      0,
                      0,
                      0,
                      1
                    ],
                    [
                      0,
                      2,
                      3,
                      0,
                      2,
                      3,
                      5,
                      0,
                      2,
                      4,
                      0,
                      2,
                      0,
                      4,
                      0,
                      0,
                      3,
                      0,
                      0,
                      0,
                      5,
                      0,
                      0,
                      3,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0
                    ],
                    [
                      1,
                      0,
                      2,
                      0,
                      1,
                      2,
                      5,
                      0,
                      3,
                      1,
                      0,
                      1,
                      0,
                      4,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      1,
                      0,
                      0,
                      0,
                      4,
                      1,
                      0
                    ],
                    [
                      4,
                      0,
                      0,
                      2,
                      0,
                      0,
                      0,
                      4,
                      0,
                      0,
                      2,
                      0,
                      3,
                      0,
                      3,
                      0,
                      0,
                      1,
                      2,
                      0,
                      0,
                      4,
                      3,
                      0,
                      2,
                      2,
                      2,
                      0,
                      0,
                      1
                    ],
                    [
                      0,
                      0,
                      2,
                      0,
                      3,
                      1,
                      4,
                      0,
                      3,
                      2,
                      0,
                      4,
                      0,
                      1,
                      0,
                      0,
                      1,
                      0,
                      0,
                      0,
                      3,
                      0,
                      0,
                      1,
                      0,
                      0,
                      0,
                      3,
                      4,
                      0
                    ],
                    [
                      0,
                      0,
                      0,
                      0,
                      3,
                      1,
                      5,
                      0,
                      1,
                      4,
                      0,
                      5,
                      0,
                      3,
                      0,
                      0,
                      2,
                      0,
                      0,
                      0,
                      3,
                      0,
                      0,
                      3,
                      0,
                      0,
                      0,
                      1,
                      0,
                      0
                    ],
                    [
                      2,
                      0,
                      17,
                      0,
                      0,
                      0,
                      0,
                      1,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0
                    ],
                    [
                      0,
                      0,
                      0,
                      3,
                      0,
                      0,
                      0,
                      1,
                      0,
                      0,
                      3,
                      0,
                      4,
                      0,
                      5,
                      0,
                      0,
                      3,
                      3,
                      0,
                      0,
                      3,
                      2,
                      0,
                      2,
                      2,
                      1,
                      0,
                      0,
                      1
                    ],
                    [
                      0,
                      0,
                      2,
                      0,
                      3,
                      1,
                      2,
                      0,
                      2,
                      1,
                      0,
                      2,
                      0,
                      2,
                      0,
                      0,
                      4,
                      0,
                      0,
                      0,
                      3,
                      0,
                      0,
                      1,
                      0,
                      0,
                      0,
                      2,
                      1,
                      0
                    ],
                    [
                      0,
                      0,
                      0,
                      0,
                      3,
                      2,
                      2,
                      0,
                      2,
                      1,
                      0,
                      4,
                      0,
                      2,
                      0,
                      0,
                      3,
                      0,
                      0,
                      0,
                      2,
                      0,
                      0,
                      3,
                      0,
                      0,
                      0,
                      0,
                      2,
                      0
                    ],
                    [
                      2,
                      0,
                      0,
                      1,
                      0,
                      0,
                      0,
                      4,
                      0,
                      0,
                      0,
                      0,
                      5,
                      0,
                      3,
                      1,
                      0,
                      1,
                      3,
                      0,
                      0,
                      1,
                      3,
                      0,
                      1,
                      0,
                      0,
                      0,
                      0,
                      1
                    ],
                    [
                      0,
                      0,
                      4,
                      0,
                      4,
                      3,
                      5,
                      0,
                      3,
                      2,
                      0,
                      0,
                      0,
                      2,
                      0,
                      0,
                      2,
                      0,
                      0,
                      0,
                      2,
                      0,
                      0,
                      1,
                      0,
                      0,
                      0,
                      1,
                      2,
                      0
                    ],
                    [
                      0,
                      0,
                      0,
                      0,
                      3,
                      2,
                      4,
                      0,
                      2,
                      1,
                      0,
                      2,
                      0,
                      2,
                      0,
                      0,
                      2,
                      0,
                      0,
                      0,
                      2,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      1,
                      0
                    ],
                    [
                      0,
                      0,
                      3,
                      0,
                      1,
                      1,
                      3,
                      0,
                      1,
                      6,
                      0,
                      2,
                      0,
                      0,
                      0,
                      0,
                      2,
                      0,
                      0,
                      0,
                      1,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      1,
                      2,
                      0
                    ],
                    [
                      6,
                      0,
                      0,
                      1,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      1,
                      0,
                      0,
                      4,
                      0,
                      3,
                      1,
                      0,
                      0,
                      2,
                      0,
                      0,
                      1,
                      0,
                      1,
                      0,
                      0,
                      2
                    ],
                    [
                      4,
                      0,
                      0,
                      1,
                      0,
                      0,
                      0,
                      3,
                      0,
                      0,
                      2,
                      0,
                      4,
                      0,
                      0,
                      1,
                      0,
                      4,
                      0,
                      0,
                      0,
                      1,
                      2,
                      0,
                      2,
                      1,
                      2,
                      0,
                      0,
                      2
                    ],
                    [
                      0,
                      0,
                      1,
                      0,
                      3,
                      4,
                      3,
                      0,
                      1,
                      4,
                      0,
                      0,
                      0,
                      1,
                      0,
                      0,
                      1,
                      0,
                      0,
                      0,
                      1,
                      0,
                      0,
                      1,
                      0,
                      0,
                      0,
                      2,
                      2,
                      0
                    ]
                  ]
                }
              ],
              "layout": {
                "coloraxis": {
                  "colorscale": [
                    [
                      0,
                      "#0d0887"
                    ],
                    [
                      0.1111111111111111,
                      "#46039f"
                    ],
                    [
                      0.2222222222222222,
                      "#7201a8"
                    ],
                    [
                      0.3333333333333333,
                      "#9c179e"
                    ],
                    [
                      0.4444444444444444,
                      "#bd3786"
                    ],
                    [
                      0.5555555555555556,
                      "#d8576b"
                    ],
                    [
                      0.6666666666666666,
                      "#ed7953"
                    ],
                    [
                      0.7777777777777778,
                      "#fb9f3a"
                    ],
                    [
                      0.8888888888888888,
                      "#fdca26"
                    ],
                    [
                      1,
                      "#f0f921"
                    ]
                  ]
                },
                "font": {
                  "size": 10
                },
                "height": 800,
                "template": {
                  "data": {
                    "bar": [
                      {
                        "error_x": {
                          "color": "#2a3f5f"
                        },
                        "error_y": {
                          "color": "#2a3f5f"
                        },
                        "marker": {
                          "line": {
                            "color": "#E5ECF6",
                            "width": 0.5
                          },
                          "pattern": {
                            "fillmode": "overlay",
                            "size": 10,
                            "solidity": 0.2
                          }
                        },
                        "type": "bar"
                      }
                    ],
                    "barpolar": [
                      {
                        "marker": {
                          "line": {
                            "color": "#E5ECF6",
                            "width": 0.5
                          },
                          "pattern": {
                            "fillmode": "overlay",
                            "size": 10,
                            "solidity": 0.2
                          }
                        },
                        "type": "barpolar"
                      }
                    ],
                    "carpet": [
                      {
                        "aaxis": {
                          "endlinecolor": "#2a3f5f",
                          "gridcolor": "white",
                          "linecolor": "white",
                          "minorgridcolor": "white",
                          "startlinecolor": "#2a3f5f"
                        },
                        "baxis": {
                          "endlinecolor": "#2a3f5f",
                          "gridcolor": "white",
                          "linecolor": "white",
                          "minorgridcolor": "white",
                          "startlinecolor": "#2a3f5f"
                        },
                        "type": "carpet"
                      }
                    ],
                    "choropleth": [
                      {
                        "colorbar": {
                          "outlinewidth": 0,
                          "ticks": ""
                        },
                        "type": "choropleth"
                      }
                    ],
                    "contour": [
                      {
                        "colorbar": {
                          "outlinewidth": 0,
                          "ticks": ""
                        },
                        "colorscale": [
                          [
                            0,
                            "#0d0887"
                          ],
                          [
                            0.1111111111111111,
                            "#46039f"
                          ],
                          [
                            0.2222222222222222,
                            "#7201a8"
                          ],
                          [
                            0.3333333333333333,
                            "#9c179e"
                          ],
                          [
                            0.4444444444444444,
                            "#bd3786"
                          ],
                          [
                            0.5555555555555556,
                            "#d8576b"
                          ],
                          [
                            0.6666666666666666,
                            "#ed7953"
                          ],
                          [
                            0.7777777777777778,
                            "#fb9f3a"
                          ],
                          [
                            0.8888888888888888,
                            "#fdca26"
                          ],
                          [
                            1,
                            "#f0f921"
                          ]
                        ],
                        "type": "contour"
                      }
                    ],
                    "contourcarpet": [
                      {
                        "colorbar": {
                          "outlinewidth": 0,
                          "ticks": ""
                        },
                        "type": "contourcarpet"
                      }
                    ],
                    "heatmap": [
                      {
                        "colorbar": {
                          "outlinewidth": 0,
                          "ticks": ""
                        },
                        "colorscale": [
                          [
                            0,
                            "#0d0887"
                          ],
                          [
                            0.1111111111111111,
                            "#46039f"
                          ],
                          [
                            0.2222222222222222,
                            "#7201a8"
                          ],
                          [
                            0.3333333333333333,
                            "#9c179e"
                          ],
                          [
                            0.4444444444444444,
                            "#bd3786"
                          ],
                          [
                            0.5555555555555556,
                            "#d8576b"
                          ],
                          [
                            0.6666666666666666,
                            "#ed7953"
                          ],
                          [
                            0.7777777777777778,
                            "#fb9f3a"
                          ],
                          [
                            0.8888888888888888,
                            "#fdca26"
                          ],
                          [
                            1,
                            "#f0f921"
                          ]
                        ],
                        "type": "heatmap"
                      }
                    ],
                    "heatmapgl": [
                      {
                        "colorbar": {
                          "outlinewidth": 0,
                          "ticks": ""
                        },
                        "colorscale": [
                          [
                            0,
                            "#0d0887"
                          ],
                          [
                            0.1111111111111111,
                            "#46039f"
                          ],
                          [
                            0.2222222222222222,
                            "#7201a8"
                          ],
                          [
                            0.3333333333333333,
                            "#9c179e"
                          ],
                          [
                            0.4444444444444444,
                            "#bd3786"
                          ],
                          [
                            0.5555555555555556,
                            "#d8576b"
                          ],
                          [
                            0.6666666666666666,
                            "#ed7953"
                          ],
                          [
                            0.7777777777777778,
                            "#fb9f3a"
                          ],
                          [
                            0.8888888888888888,
                            "#fdca26"
                          ],
                          [
                            1,
                            "#f0f921"
                          ]
                        ],
                        "type": "heatmapgl"
                      }
                    ],
                    "histogram": [
                      {
                        "marker": {
                          "pattern": {
                            "fillmode": "overlay",
                            "size": 10,
                            "solidity": 0.2
                          }
                        },
                        "type": "histogram"
                      }
                    ],
                    "histogram2d": [
                      {
                        "colorbar": {
                          "outlinewidth": 0,
                          "ticks": ""
                        },
                        "colorscale": [
                          [
                            0,
                            "#0d0887"
                          ],
                          [
                            0.1111111111111111,
                            "#46039f"
                          ],
                          [
                            0.2222222222222222,
                            "#7201a8"
                          ],
                          [
                            0.3333333333333333,
                            "#9c179e"
                          ],
                          [
                            0.4444444444444444,
                            "#bd3786"
                          ],
                          [
                            0.5555555555555556,
                            "#d8576b"
                          ],
                          [
                            0.6666666666666666,
                            "#ed7953"
                          ],
                          [
                            0.7777777777777778,
                            "#fb9f3a"
                          ],
                          [
                            0.8888888888888888,
                            "#fdca26"
                          ],
                          [
                            1,
                            "#f0f921"
                          ]
                        ],
                        "type": "histogram2d"
                      }
                    ],
                    "histogram2dcontour": [
                      {
                        "colorbar": {
                          "outlinewidth": 0,
                          "ticks": ""
                        },
                        "colorscale": [
                          [
                            0,
                            "#0d0887"
                          ],
                          [
                            0.1111111111111111,
                            "#46039f"
                          ],
                          [
                            0.2222222222222222,
                            "#7201a8"
                          ],
                          [
                            0.3333333333333333,
                            "#9c179e"
                          ],
                          [
                            0.4444444444444444,
                            "#bd3786"
                          ],
                          [
                            0.5555555555555556,
                            "#d8576b"
                          ],
                          [
                            0.6666666666666666,
                            "#ed7953"
                          ],
                          [
                            0.7777777777777778,
                            "#fb9f3a"
                          ],
                          [
                            0.8888888888888888,
                            "#fdca26"
                          ],
                          [
                            1,
                            "#f0f921"
                          ]
                        ],
                        "type": "histogram2dcontour"
                      }
                    ],
                    "mesh3d": [
                      {
                        "colorbar": {
                          "outlinewidth": 0,
                          "ticks": ""
                        },
                        "type": "mesh3d"
                      }
                    ],
                    "parcoords": [
                      {
                        "line": {
                          "colorbar": {
                            "outlinewidth": 0,
                            "ticks": ""
                          }
                        },
                        "type": "parcoords"
                      }
                    ],
                    "pie": [
                      {
                        "automargin": true,
                        "type": "pie"
                      }
                    ],
                    "scatter": [
                      {
                        "fillpattern": {
                          "fillmode": "overlay",
                          "size": 10,
                          "solidity": 0.2
                        },
                        "type": "scatter"
                      }
                    ],
                    "scatter3d": [
                      {
                        "line": {
                          "colorbar": {
                            "outlinewidth": 0,
                            "ticks": ""
                          }
                        },
                        "marker": {
                          "colorbar": {
                            "outlinewidth": 0,
                            "ticks": ""
                          }
                        },
                        "type": "scatter3d"
                      }
                    ],
                    "scattercarpet": [
                      {
                        "marker": {
                          "colorbar": {
                            "outlinewidth": 0,
                            "ticks": ""
                          }
                        },
                        "type": "scattercarpet"
                      }
                    ],
                    "scattergeo": [
                      {
                        "marker": {
                          "colorbar": {
                            "outlinewidth": 0,
                            "ticks": ""
                          }
                        },
                        "type": "scattergeo"
                      }
                    ],
                    "scattergl": [
                      {
                        "marker": {
                          "colorbar": {
                            "outlinewidth": 0,
                            "ticks": ""
                          }
                        },
                        "type": "scattergl"
                      }
                    ],
                    "scattermapbox": [
                      {
                        "marker": {
                          "colorbar": {
                            "outlinewidth": 0,
                            "ticks": ""
                          }
                        },
                        "type": "scattermapbox"
                      }
                    ],
                    "scatterpolar": [
                      {
                        "marker": {
                          "colorbar": {
                            "outlinewidth": 0,
                            "ticks": ""
                          }
                        },
                        "type": "scatterpolar"
                      }
                    ],
                    "scatterpolargl": [
                      {
                        "marker": {
                          "colorbar": {
                            "outlinewidth": 0,
                            "ticks": ""
                          }
                        },
                        "type": "scatterpolargl"
                      }
                    ],
                    "scatterternary": [
                      {
                        "marker": {
                          "colorbar": {
                            "outlinewidth": 0,
                            "ticks": ""
                          }
                        },
                        "type": "scatterternary"
                      }
                    ],
                    "surface": [
                      {
                        "colorbar": {
                          "outlinewidth": 0,
                          "ticks": ""
                        },
                        "colorscale": [
                          [
                            0,
                            "#0d0887"
                          ],
                          [
                            0.1111111111111111,
                            "#46039f"
                          ],
                          [
                            0.2222222222222222,
                            "#7201a8"
                          ],
                          [
                            0.3333333333333333,
                            "#9c179e"
                          ],
                          [
                            0.4444444444444444,
                            "#bd3786"
                          ],
                          [
                            0.5555555555555556,
                            "#d8576b"
                          ],
                          [
                            0.6666666666666666,
                            "#ed7953"
                          ],
                          [
                            0.7777777777777778,
                            "#fb9f3a"
                          ],
                          [
                            0.8888888888888888,
                            "#fdca26"
                          ],
                          [
                            1,
                            "#f0f921"
                          ]
                        ],
                        "type": "surface"
                      }
                    ],
                    "table": [
                      {
                        "cells": {
                          "fill": {
                            "color": "#EBF0F8"
                          },
                          "line": {
                            "color": "white"
                          }
                        },
                        "header": {
                          "fill": {
                            "color": "#C8D4E3"
                          },
                          "line": {
                            "color": "white"
                          }
                        },
                        "type": "table"
                      }
                    ]
                  },
                  "layout": {
                    "annotationdefaults": {
                      "arrowcolor": "#2a3f5f",
                      "arrowhead": 0,
                      "arrowwidth": 1
                    },
                    "autotypenumbers": "strict",
                    "coloraxis": {
                      "colorbar": {
                        "outlinewidth": 0,
                        "ticks": ""
                      }
                    },
                    "colorscale": {
                      "diverging": [
                        [
                          0,
                          "#8e0152"
                        ],
                        [
                          0.1,
                          "#c51b7d"
                        ],
                        [
                          0.2,
                          "#de77ae"
                        ],
                        [
                          0.3,
                          "#f1b6da"
                        ],
                        [
                          0.4,
                          "#fde0ef"
                        ],
                        [
                          0.5,
                          "#f7f7f7"
                        ],
                        [
                          0.6,
                          "#e6f5d0"
                        ],
                        [
                          0.7,
                          "#b8e186"
                        ],
                        [
                          0.8,
                          "#7fbc41"
                        ],
                        [
                          0.9,
                          "#4d9221"
                        ],
                        [
                          1,
                          "#276419"
                        ]
                      ],
                      "sequential": [
                        [
                          0,
                          "#0d0887"
                        ],
                        [
                          0.1111111111111111,
                          "#46039f"
                        ],
                        [
                          0.2222222222222222,
                          "#7201a8"
                        ],
                        [
                          0.3333333333333333,
                          "#9c179e"
                        ],
                        [
                          0.4444444444444444,
                          "#bd3786"
                        ],
                        [
                          0.5555555555555556,
                          "#d8576b"
                        ],
                        [
                          0.6666666666666666,
                          "#ed7953"
                        ],
                        [
                          0.7777777777777778,
                          "#fb9f3a"
                        ],
                        [
                          0.8888888888888888,
                          "#fdca26"
                        ],
                        [
                          1,
                          "#f0f921"
                        ]
                      ],
                      "sequentialminus": [
                        [
                          0,
                          "#0d0887"
                        ],
                        [
                          0.1111111111111111,
                          "#46039f"
                        ],
                        [
                          0.2222222222222222,
                          "#7201a8"
                        ],
                        [
                          0.3333333333333333,
                          "#9c179e"
                        ],
                        [
                          0.4444444444444444,
                          "#bd3786"
                        ],
                        [
                          0.5555555555555556,
                          "#d8576b"
                        ],
                        [
                          0.6666666666666666,
                          "#ed7953"
                        ],
                        [
                          0.7777777777777778,
                          "#fb9f3a"
                        ],
                        [
                          0.8888888888888888,
                          "#fdca26"
                        ],
                        [
                          1,
                          "#f0f921"
                        ]
                      ]
                    },
                    "colorway": [
                      "#636efa",
                      "#EF553B",
                      "#00cc96",
                      "#ab63fa",
                      "#FFA15A",
                      "#19d3f3",
                      "#FF6692",
                      "#B6E880",
                      "#FF97FF",
                      "#FECB52"
                    ],
                    "font": {
                      "color": "#2a3f5f"
                    },
                    "geo": {
                      "bgcolor": "white",
                      "lakecolor": "white",
                      "landcolor": "#E5ECF6",
                      "showlakes": true,
                      "showland": true,
                      "subunitcolor": "white"
                    },
                    "hoverlabel": {
                      "align": "left"
                    },
                    "hovermode": "closest",
                    "mapbox": {
                      "style": "light"
                    },
                    "paper_bgcolor": "white",
                    "plot_bgcolor": "#E5ECF6",
                    "polar": {
                      "angularaxis": {
                        "gridcolor": "white",
                        "linecolor": "white",
                        "ticks": ""
                      },
                      "bgcolor": "#E5ECF6",
                      "radialaxis": {
                        "gridcolor": "white",
                        "linecolor": "white",
                        "ticks": ""
                      }
                    },
                    "scene": {
                      "xaxis": {
                        "backgroundcolor": "#E5ECF6",
                        "gridcolor": "white",
                        "gridwidth": 2,
                        "linecolor": "white",
                        "showbackground": true,
                        "ticks": "",
                        "zerolinecolor": "white"
                      },
                      "yaxis": {
                        "backgroundcolor": "#E5ECF6",
                        "gridcolor": "white",
                        "gridwidth": 2,
                        "linecolor": "white",
                        "showbackground": true,
                        "ticks": "",
                        "zerolinecolor": "white"
                      },
                      "zaxis": {
                        "backgroundcolor": "#E5ECF6",
                        "gridcolor": "white",
                        "gridwidth": 2,
                        "linecolor": "white",
                        "showbackground": true,
                        "ticks": "",
                        "zerolinecolor": "white"
                      }
                    },
                    "shapedefaults": {
                      "line": {
                        "color": "#2a3f5f"
                      }
                    },
                    "ternary": {
                      "aaxis": {
                        "gridcolor": "white",
                        "linecolor": "white",
                        "ticks": ""
                      },
                      "baxis": {
                        "gridcolor": "white",
                        "linecolor": "white",
                        "ticks": ""
                      },
                      "bgcolor": "#E5ECF6",
                      "caxis": {
                        "gridcolor": "white",
                        "linecolor": "white",
                        "ticks": ""
                      }
                    },
                    "title": {
                      "x": 0.05
                    },
                    "xaxis": {
                      "automargin": true,
                      "gridcolor": "white",
                      "linecolor": "white",
                      "ticks": "",
                      "title": {
                        "standoff": 15
                      },
                      "zerolinecolor": "white",
                      "zerolinewidth": 2
                    },
                    "yaxis": {
                      "automargin": true,
                      "gridcolor": "white",
                      "linecolor": "white",
                      "ticks": "",
                      "title": {
                        "standoff": 15
                      },
                      "zerolinecolor": "white",
                      "zerolinewidth": 2
                    }
                  }
                },
                "title": {
                  "text": "Battle Count for Each Combination of Models (without Ties)",
                  "x": 0.5,
                  "y": 0.07
                },
                "width": 800,
                "xaxis": {
                  "anchor": "y",
                  "constrain": "domain",
                  "domain": [
                    0,
                    1
                  ],
                  "scaleanchor": "y",
                  "side": "top",
                  "title": {
                    "text": "Agent B"
                  }
                },
                "yaxis": {
                  "anchor": "x",
                  "autorange": "reversed",
                  "constrain": "domain",
                  "domain": [
                    0,
                    1
                  ],
                  "title": {
                    "text": "Agent A"
                  }
                }
              }
            }
          },
          "metadata": {},
          "output_type": "display_data"
        }
      ],
      "source": [
        "visualize_battle_count(ratings_no_tie, \"Battle Count for Each Combination of Models (without Ties)\")"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 129,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 817
        },
        "id": "1mae0ZVeY3Qz",
        "outputId": "6b4a51e8-72bd-4cdb-ed90-4c4460056d0c"
      },
      "outputs": [
        {
          "data": {
            "application/vnd.plotly.v1+json": {
              "config": {
                "plotlyServerURL": "https://plot.ly"
              },
              "data": [
                {
                  "coloraxis": "coloraxis",
                  "hovertemplate": "Agent A: %{y}<br>Agent B: %{x}<br>Count: %{z}<extra></extra>",
                  "name": "0",
                  "texttemplate": "%{z}",
                  "type": "heatmap",
                  "x": [
                    "langchain Wikipedia (gpt-4o-2024-08-06)",
                    "langchain google-serper search agent (gpt-4o-2024-08-06)",
                    "llamaindex brave-search agent (gpt-4o-2024-08-06)",
                    "openai assistant function calling (gpt-4o-2024-08-06)",
                    "langchain Wikipedia (llama-3.1-70B-instruct)",
                    "langchain brave-search agent (gpt-4o-2024-08-06)",
                    "langchain Wikipedia (gpt-4o-mini-2024-07-18)",
                    "langchain Wikipedia (gpt-4o-2024-05-13)",
                    "langchain google-serper search agent (open-mixtral-8x22b)",
                    "langchain Wikipedia (open-mixtral-8x7b)",
                    "langchain Wikipedia (claude-3-5-sonnet-20240620)",
                    "langchain google-serper search agent (gemini-1.5-pro-001)",
                    "langchain Wikipedia (gemini-1.5-pro-001)",
                    "langchain google-serper search agent (gpt-4o-2024-05-13)",
                    "langchain google-serper search agent (claude-3-5-sonnet-20240620)",
                    "langchain google-serper search agent (claude-3-haiku-20240307)",
                    "langchain google-serper search agent (gpt-4-turbo-2024-04-09)",
                    "langchain Wikipedia (llama-3.1-405B-instruct)",
                    "langchain YouTube Search (gpt-4o-2024-08-06)",
                    "langchain Wikipedia (claude-3-opus-20240229)",
                    "langchain google-serper search agent (gemini-1.5-flash-001)",
                    "langchain google-serper search agent (llama-3.1-405B-instruct)",
                    "langchain google-serper search agent (mistral-large-2407)",
                    "langchain Wikipedia (gpt-4-turbo-2024-04-09)",
                    "langchain Wikipedia (gpt-4-0613)",
                    "langchain google-serper search agent (gpt-4-0613)",
                    "langchain google-serper search agent (claude-3-opus-20240229)",
                    "langchain google-serper search agent (llama-3.1-70B-instruct)",
                    "langchain google-serper search agent (open-mixtral-8x7b)",
                    "langchain alpha-vantage stock agent (gpt-4-turbo-2024-04-09)"
                  ],
                  "xaxis": "x",
                  "y": [
                    "langchain Wikipedia (gpt-4o-2024-08-06)",
                    "langchain google-serper search agent (gpt-4o-2024-08-06)",
                    "llamaindex brave-search agent (gpt-4o-2024-08-06)",
                    "openai assistant function calling (gpt-4o-2024-08-06)",
                    "langchain Wikipedia (llama-3.1-70B-instruct)",
                    "langchain brave-search agent (gpt-4o-2024-08-06)",
                    "langchain Wikipedia (gpt-4o-mini-2024-07-18)",
                    "langchain Wikipedia (gpt-4o-2024-05-13)",
                    "langchain google-serper search agent (open-mixtral-8x22b)",
                    "langchain Wikipedia (open-mixtral-8x7b)",
                    "langchain Wikipedia (claude-3-5-sonnet-20240620)",
                    "langchain google-serper search agent (gemini-1.5-pro-001)",
                    "langchain Wikipedia (gemini-1.5-pro-001)",
                    "langchain google-serper search agent (gpt-4o-2024-05-13)",
                    "langchain google-serper search agent (claude-3-5-sonnet-20240620)",
                    "langchain google-serper search agent (claude-3-haiku-20240307)",
                    "langchain google-serper search agent (gpt-4-turbo-2024-04-09)",
                    "langchain Wikipedia (llama-3.1-405B-instruct)",
                    "langchain YouTube Search (gpt-4o-2024-08-06)",
                    "langchain Wikipedia (claude-3-opus-20240229)",
                    "langchain google-serper search agent (gemini-1.5-flash-001)",
                    "langchain google-serper search agent (llama-3.1-405B-instruct)",
                    "langchain google-serper search agent (mistral-large-2407)",
                    "langchain Wikipedia (gpt-4-turbo-2024-04-09)",
                    "langchain Wikipedia (gpt-4-0613)",
                    "langchain google-serper search agent (gpt-4-0613)",
                    "langchain google-serper search agent (claude-3-opus-20240229)",
                    "langchain google-serper search agent (llama-3.1-70B-instruct)",
                    "langchain google-serper search agent (open-mixtral-8x7b)",
                    "langchain alpha-vantage stock agent (gpt-4-turbo-2024-04-09)"
                  ],
                  "yaxis": "y",
                  "z": [
                    [
                      2,
                      2,
                      6,
                      1,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      1,
                      2,
                      0,
                      0,
                      0,
                      0,
                      1,
                      0,
                      2,
                      1,
                      0,
                      0,
                      0,
                      0,
                      1,
                      0,
                      0,
                      1,
                      0,
                      0
                    ],
                    [
                      2,
                      0,
                      2,
                      0,
                      0,
                      9,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      1,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0
                    ],
                    [
                      6,
                      2,
                      0,
                      2,
                      1,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      3,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0
                    ],
                    [
                      1,
                      0,
                      2,
                      0,
                      1,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      1,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0
                    ],
                    [
                      0,
                      0,
                      1,
                      1,
                      0,
                      0,
                      0,
                      0,
                      2,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      1,
                      0,
                      0,
                      1,
                      0,
                      1,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      1,
                      1,
                      0
                    ],
                    [
                      0,
                      9,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      1,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0
                    ],
                    [
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      1,
                      0,
                      0,
                      0,
                      0,
                      1,
                      0,
                      1,
                      0,
                      0,
                      0,
                      0,
                      2,
                      0,
                      1,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0
                    ],
                    [
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      1,
                      0,
                      0,
                      1,
                      0,
                      1,
                      2,
                      0,
                      0,
                      0,
                      1,
                      0,
                      0,
                      2,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0
                    ],
                    [
                      0,
                      0,
                      0,
                      0,
                      2,
                      0,
                      1,
                      1,
                      0,
                      0,
                      1,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      1,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      1,
                      0,
                      0,
                      0,
                      0,
                      0
                    ],
                    [
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      1,
                      2,
                      1,
                      0,
                      1,
                      0,
                      2,
                      0,
                      1,
                      0,
                      0,
                      1,
                      0,
                      1,
                      0,
                      0
                    ],
                    [
                      1,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      1,
                      0,
                      0,
                      1,
                      0,
                      0,
                      2,
                      0,
                      1,
                      0,
                      1,
                      0,
                      0,
                      1,
                      0,
                      0,
                      0,
                      0,
                      0,
                      1,
                      0,
                      0
                    ],
                    [
                      2,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      1,
                      0,
                      0,
                      1,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      1,
                      0,
                      1,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      1
                    ],
                    [
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      1,
                      0,
                      0,
                      1,
                      0,
                      0,
                      0,
                      0,
                      1,
                      0,
                      0,
                      0,
                      2,
                      2,
                      0,
                      0,
                      0
                    ],
                    [
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      1,
                      1,
                      0,
                      0,
                      0,
                      0,
                      1,
                      0,
                      0,
                      0,
                      0,
                      1,
                      0,
                      2,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0
                    ],
                    [
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      2,
                      0,
                      1,
                      2,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      1,
                      1,
                      0,
                      0,
                      0,
                      0,
                      0
                    ],
                    [
                      0,
                      0,
                      0,
                      0,
                      1,
                      0,
                      1,
                      0,
                      0,
                      2,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      1,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0
                    ],
                    [
                      1,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      1,
                      1,
                      0,
                      1,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      1,
                      0,
                      0,
                      0,
                      1,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0
                    ],
                    [
                      0,
                      0,
                      0,
                      1,
                      0,
                      0,
                      0,
                      0,
                      1,
                      0,
                      0,
                      1,
                      0,
                      1,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      1,
                      2,
                      0,
                      0,
                      0
                    ],
                    [
                      2,
                      0,
                      3,
                      0,
                      1,
                      0,
                      0,
                      1,
                      0,
                      1,
                      1,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      1,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0
                    ],
                    [
                      1,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      1,
                      0,
                      2,
                      0,
                      0,
                      1,
                      0,
                      0,
                      0,
                      0,
                      0,
                      2,
                      0,
                      0,
                      1,
                      0,
                      0,
                      0,
                      0
                    ],
                    [
                      0,
                      0,
                      0,
                      0,
                      1,
                      0,
                      2,
                      0,
                      0,
                      2,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0
                    ],
                    [
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      2,
                      0,
                      0,
                      1,
                      0,
                      1,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      1,
                      0,
                      0,
                      0,
                      0,
                      0
                    ],
                    [
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      1,
                      0,
                      0,
                      1,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      1,
                      2,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0
                    ],
                    [
                      0,
                      1,
                      0,
                      0,
                      0,
                      1,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      1,
                      1,
                      1,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      2,
                      0
                    ],
                    [
                      1,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      1,
                      0,
                      0,
                      0,
                      0,
                      0,
                      1,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      1,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      1,
                      0
                    ],
                    [
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      1,
                      0,
                      0,
                      2,
                      0,
                      0,
                      0,
                      0,
                      1,
                      0,
                      1,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0
                    ],
                    [
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      2,
                      0,
                      0,
                      0,
                      0,
                      2,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0
                    ],
                    [
                      1,
                      0,
                      0,
                      0,
                      1,
                      0,
                      0,
                      0,
                      0,
                      1,
                      1,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0
                    ],
                    [
                      0,
                      0,
                      0,
                      0,
                      1,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      2,
                      1,
                      0,
                      0,
                      0,
                      0,
                      0
                    ],
                    [
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      1,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0,
                      0
                    ]
                  ]
                }
              ],
              "layout": {
                "coloraxis": {
                  "colorscale": [
                    [
                      0,
                      "#0d0887"
                    ],
                    [
                      0.1111111111111111,
                      "#46039f"
                    ],
                    [
                      0.2222222222222222,
                      "#7201a8"
                    ],
                    [
                      0.3333333333333333,
                      "#9c179e"
                    ],
                    [
                      0.4444444444444444,
                      "#bd3786"
                    ],
                    [
                      0.5555555555555556,
                      "#d8576b"
                    ],
                    [
                      0.6666666666666666,
                      "#ed7953"
                    ],
                    [
                      0.7777777777777778,
                      "#fb9f3a"
                    ],
                    [
                      0.8888888888888888,
                      "#fdca26"
                    ],
                    [
                      1,
                      "#f0f921"
                    ]
                  ]
                },
                "font": {
                  "size": 10
                },
                "height": 800,
                "template": {
                  "data": {
                    "bar": [
                      {
                        "error_x": {
                          "color": "#2a3f5f"
                        },
                        "error_y": {
                          "color": "#2a3f5f"
                        },
                        "marker": {
                          "line": {
                            "color": "#E5ECF6",
                            "width": 0.5
                          },
                          "pattern": {
                            "fillmode": "overlay",
                            "size": 10,
                            "solidity": 0.2
                          }
                        },
                        "type": "bar"
                      }
                    ],
                    "barpolar": [
                      {
                        "marker": {
                          "line": {
                            "color": "#E5ECF6",
                            "width": 0.5
                          },
                          "pattern": {
                            "fillmode": "overlay",
                            "size": 10,
                            "solidity": 0.2
                          }
                        },
                        "type": "barpolar"
                      }
                    ],
                    "carpet": [
                      {
                        "aaxis": {
                          "endlinecolor": "#2a3f5f",
                          "gridcolor": "white",
                          "linecolor": "white",
                          "minorgridcolor": "white",
                          "startlinecolor": "#2a3f5f"
                        },
                        "baxis": {
                          "endlinecolor": "#2a3f5f",
                          "gridcolor": "white",
                          "linecolor": "white",
                          "minorgridcolor": "white",
                          "startlinecolor": "#2a3f5f"
                        },
                        "type": "carpet"
                      }
                    ],
                    "choropleth": [
                      {
                        "colorbar": {
                          "outlinewidth": 0,
                          "ticks": ""
                        },
                        "type": "choropleth"
                      }
                    ],
                    "contour": [
                      {
                        "colorbar": {
                          "outlinewidth": 0,
                          "ticks": ""
                        },
                        "colorscale": [
                          [
                            0,
                            "#0d0887"
                          ],
                          [
                            0.1111111111111111,
                            "#46039f"
                          ],
                          [
                            0.2222222222222222,
                            "#7201a8"
                          ],
                          [
                            0.3333333333333333,
                            "#9c179e"
                          ],
                          [
                            0.4444444444444444,
                            "#bd3786"
                          ],
                          [
                            0.5555555555555556,
                            "#d8576b"
                          ],
                          [
                            0.6666666666666666,
                            "#ed7953"
                          ],
                          [
                            0.7777777777777778,
                            "#fb9f3a"
                          ],
                          [
                            0.8888888888888888,
                            "#fdca26"
                          ],
                          [
                            1,
                            "#f0f921"
                          ]
                        ],
                        "type": "contour"
                      }
                    ],
                    "contourcarpet": [
                      {
                        "colorbar": {
                          "outlinewidth": 0,
                          "ticks": ""
                        },
                        "type": "contourcarpet"
                      }
                    ],
                    "heatmap": [
                      {
                        "colorbar": {
                          "outlinewidth": 0,
                          "ticks": ""
                        },
                        "colorscale": [
                          [
                            0,
                            "#0d0887"
                          ],
                          [
                            0.1111111111111111,
                            "#46039f"
                          ],
                          [
                            0.2222222222222222,
                            "#7201a8"
                          ],
                          [
                            0.3333333333333333,
                            "#9c179e"
                          ],
                          [
                            0.4444444444444444,
                            "#bd3786"
                          ],
                          [
                            0.5555555555555556,
                            "#d8576b"
                          ],
                          [
                            0.6666666666666666,
                            "#ed7953"
                          ],
                          [
                            0.7777777777777778,
                            "#fb9f3a"
                          ],
                          [
                            0.8888888888888888,
                            "#fdca26"
                          ],
                          [
                            1,
                            "#f0f921"
                          ]
                        ],
                        "type": "heatmap"
                      }
                    ],
                    "heatmapgl": [
                      {
                        "colorbar": {
                          "outlinewidth": 0,
                          "ticks": ""
                        },
                        "colorscale": [
                          [
                            0,
                            "#0d0887"
                          ],
                          [
                            0.1111111111111111,
                            "#46039f"
                          ],
                          [
                            0.2222222222222222,
                            "#7201a8"
                          ],
                          [
                            0.3333333333333333,
                            "#9c179e"
                          ],
                          [
                            0.4444444444444444,
                            "#bd3786"
                          ],
                          [
                            0.5555555555555556,
                            "#d8576b"
                          ],
                          [
                            0.6666666666666666,
                            "#ed7953"
                          ],
                          [
                            0.7777777777777778,
                            "#fb9f3a"
                          ],
                          [
                            0.8888888888888888,
                            "#fdca26"
                          ],
                          [
                            1,
                            "#f0f921"
                          ]
                        ],
                        "type": "heatmapgl"
                      }
                    ],
                    "histogram": [
                      {
                        "marker": {
                          "pattern": {
                            "fillmode": "overlay",
                            "size": 10,
                            "solidity": 0.2
                          }
                        },
                        "type": "histogram"
                      }
                    ],
                    "histogram2d": [
                      {
                        "colorbar": {
                          "outlinewidth": 0,
                          "ticks": ""
                        },
                        "colorscale": [
                          [
                            0,
                            "#0d0887"
                          ],
                          [
                            0.1111111111111111,
                            "#46039f"
                          ],
                          [
                            0.2222222222222222,
                            "#7201a8"
                          ],
                          [
                            0.3333333333333333,
                            "#9c179e"
                          ],
                          [
                            0.4444444444444444,
                            "#bd3786"
                          ],
                          [
                            0.5555555555555556,
                            "#d8576b"
                          ],
                          [
                            0.6666666666666666,
                            "#ed7953"
                          ],
                          [
                            0.7777777777777778,
                            "#fb9f3a"
                          ],
                          [
                            0.8888888888888888,
                            "#fdca26"
                          ],
                          [
                            1,
                            "#f0f921"
                          ]
                        ],
                        "type": "histogram2d"
                      }
                    ],
                    "histogram2dcontour": [
                      {
                        "colorbar": {
                          "outlinewidth": 0,
                          "ticks": ""
                        },
                        "colorscale": [
                          [
                            0,
                            "#0d0887"
                          ],
                          [
                            0.1111111111111111,
                            "#46039f"
                          ],
                          [
                            0.2222222222222222,
                            "#7201a8"
                          ],
                          [
                            0.3333333333333333,
                            "#9c179e"
                          ],
                          [
                            0.4444444444444444,
                            "#bd3786"
                          ],
                          [
                            0.5555555555555556,
                            "#d8576b"
                          ],
                          [
                            0.6666666666666666,
                            "#ed7953"
                          ],
                          [
                            0.7777777777777778,
                            "#fb9f3a"
                          ],
                          [
                            0.8888888888888888,
                            "#fdca26"
                          ],
                          [
                            1,
                            "#f0f921"
                          ]
                        ],
                        "type": "histogram2dcontour"
                      }
                    ],
                    "mesh3d": [
                      {
                        "colorbar": {
                          "outlinewidth": 0,
                          "ticks": ""
                        },
                        "type": "mesh3d"
                      }
                    ],
                    "parcoords": [
                      {
                        "line": {
                          "colorbar": {
                            "outlinewidth": 0,
                            "ticks": ""
                          }
                        },
                        "type": "parcoords"
                      }
                    ],
                    "pie": [
                      {
                        "automargin": true,
                        "type": "pie"
                      }
                    ],
                    "scatter": [
                      {
                        "fillpattern": {
                          "fillmode": "overlay",
                          "size": 10,
                          "solidity": 0.2
                        },
                        "type": "scatter"
                      }
                    ],
                    "scatter3d": [
                      {
                        "line": {
                          "colorbar": {
                            "outlinewidth": 0,
                            "ticks": ""
                          }
                        },
                        "marker": {
                          "colorbar": {
                            "outlinewidth": 0,
                            "ticks": ""
                          }
                        },
                        "type": "scatter3d"
                      }
                    ],
                    "scattercarpet": [
                      {
                        "marker": {
                          "colorbar": {
                            "outlinewidth": 0,
                            "ticks": ""
                          }
                        },
                        "type": "scattercarpet"
                      }
                    ],
                    "scattergeo": [
                      {
                        "marker": {
                          "colorbar": {
                            "outlinewidth": 0,
                            "ticks": ""
                          }
                        },
                        "type": "scattergeo"
                      }
                    ],
                    "scattergl": [
                      {
                        "marker": {
                          "colorbar": {
                            "outlinewidth": 0,
                            "ticks": ""
                          }
                        },
                        "type": "scattergl"
                      }
                    ],
                    "scattermapbox": [
                      {
                        "marker": {
                          "colorbar": {
                            "outlinewidth": 0,
                            "ticks": ""
                          }
                        },
                        "type": "scattermapbox"
                      }
                    ],
                    "scatterpolar": [
                      {
                        "marker": {
                          "colorbar": {
                            "outlinewidth": 0,
                            "ticks": ""
                          }
                        },
                        "type": "scatterpolar"
                      }
                    ],
                    "scatterpolargl": [
                      {
                        "marker": {
                          "colorbar": {
                            "outlinewidth": 0,
                            "ticks": ""
                          }
                        },
                        "type": "scatterpolargl"
                      }
                    ],
                    "scatterternary": [
                      {
                        "marker": {
                          "colorbar": {
                            "outlinewidth": 0,
                            "ticks": ""
                          }
                        },
                        "type": "scatterternary"
                      }
                    ],
                    "surface": [
                      {
                        "colorbar": {
                          "outlinewidth": 0,
                          "ticks": ""
                        },
                        "colorscale": [
                          [
                            0,
                            "#0d0887"
                          ],
                          [
                            0.1111111111111111,
                            "#46039f"
                          ],
                          [
                            0.2222222222222222,
                            "#7201a8"
                          ],
                          [
                            0.3333333333333333,
                            "#9c179e"
                          ],
                          [
                            0.4444444444444444,
                            "#bd3786"
                          ],
                          [
                            0.5555555555555556,
                            "#d8576b"
                          ],
                          [
                            0.6666666666666666,
                            "#ed7953"
                          ],
                          [
                            0.7777777777777778,
                            "#fb9f3a"
                          ],
                          [
                            0.8888888888888888,
                            "#fdca26"
                          ],
                          [
                            1,
                            "#f0f921"
                          ]
                        ],
                        "type": "surface"
                      }
                    ],
                    "table": [
                      {
                        "cells": {
                          "fill": {
                            "color": "#EBF0F8"
                          },
                          "line": {
                            "color": "white"
                          }
                        },
                        "header": {
                          "fill": {
                            "color": "#C8D4E3"
                          },
                          "line": {
                            "color": "white"
                          }
                        },
                        "type": "table"
                      }
                    ]
                  },
                  "layout": {
                    "annotationdefaults": {
                      "arrowcolor": "#2a3f5f",
                      "arrowhead": 0,
                      "arrowwidth": 1
                    },
                    "autotypenumbers": "strict",
                    "coloraxis": {
                      "colorbar": {
                        "outlinewidth": 0,
                        "ticks": ""
                      }
                    },
                    "colorscale": {
                      "diverging": [
                        [
                          0,
                          "#8e0152"
                        ],
                        [
                          0.1,
                          "#c51b7d"
                        ],
                        [
                          0.2,
                          "#de77ae"
                        ],
                        [
                          0.3,
                          "#f1b6da"
                        ],
                        [
                          0.4,
                          "#fde0ef"
                        ],
                        [
                          0.5,
                          "#f7f7f7"
                        ],
                        [
                          0.6,
                          "#e6f5d0"
                        ],
                        [
                          0.7,
                          "#b8e186"
                        ],
                        [
                          0.8,
                          "#7fbc41"
                        ],
                        [
                          0.9,
                          "#4d9221"
                        ],
                        [
                          1,
                          "#276419"
                        ]
                      ],
                      "sequential": [
                        [
                          0,
                          "#0d0887"
                        ],
                        [
                          0.1111111111111111,
                          "#46039f"
                        ],
                        [
                          0.2222222222222222,
                          "#7201a8"
                        ],
                        [
                          0.3333333333333333,
                          "#9c179e"
                        ],
                        [
                          0.4444444444444444,
                          "#bd3786"
                        ],
                        [
                          0.5555555555555556,
                          "#d8576b"
                        ],
                        [
                          0.6666666666666666,
                          "#ed7953"
                        ],
                        [
                          0.7777777777777778,
                          "#fb9f3a"
                        ],
                        [
                          0.8888888888888888,
                          "#fdca26"
                        ],
                        [
                          1,
                          "#f0f921"
                        ]
                      ],
                      "sequentialminus": [
                        [
                          0,
                          "#0d0887"
                        ],
                        [
                          0.1111111111111111,
                          "#46039f"
                        ],
                        [
                          0.2222222222222222,
                          "#7201a8"
                        ],
                        [
                          0.3333333333333333,
                          "#9c179e"
                        ],
                        [
                          0.4444444444444444,
                          "#bd3786"
                        ],
                        [
                          0.5555555555555556,
                          "#d8576b"
                        ],
                        [
                          0.6666666666666666,
                          "#ed7953"
                        ],
                        [
                          0.7777777777777778,
                          "#fb9f3a"
                        ],
                        [
                          0.8888888888888888,
                          "#fdca26"
                        ],
                        [
                          1,
                          "#f0f921"
                        ]
                      ]
                    },
                    "colorway": [
                      "#636efa",
                      "#EF553B",
                      "#00cc96",
                      "#ab63fa",
                      "#FFA15A",
                      "#19d3f3",
                      "#FF6692",
                      "#B6E880",
                      "#FF97FF",
                      "#FECB52"
                    ],
                    "font": {
                      "color": "#2a3f5f"
                    },
                    "geo": {
                      "bgcolor": "white",
                      "lakecolor": "white",
                      "landcolor": "#E5ECF6",
                      "showlakes": true,
                      "showland": true,
                      "subunitcolor": "white"
                    },
                    "hoverlabel": {
                      "align": "left"
                    },
                    "hovermode": "closest",
                    "mapbox": {
                      "style": "light"
                    },
                    "paper_bgcolor": "white",
                    "plot_bgcolor": "#E5ECF6",
                    "polar": {
                      "angularaxis": {
                        "gridcolor": "white",
                        "linecolor": "white",
                        "ticks": ""
                      },
                      "bgcolor": "#E5ECF6",
                      "radialaxis": {
                        "gridcolor": "white",
                        "linecolor": "white",
                        "ticks": ""
                      }
                    },
                    "scene": {
                      "xaxis": {
                        "backgroundcolor": "#E5ECF6",
                        "gridcolor": "white",
                        "gridwidth": 2,
                        "linecolor": "white",
                        "showbackground": true,
                        "ticks": "",
                        "zerolinecolor": "white"
                      },
                      "yaxis": {
                        "backgroundcolor": "#E5ECF6",
                        "gridcolor": "white",
                        "gridwidth": 2,
                        "linecolor": "white",
                        "showbackground": true,
                        "ticks": "",
                        "zerolinecolor": "white"
                      },
                      "zaxis": {
                        "backgroundcolor": "#E5ECF6",
                        "gridcolor": "white",
                        "gridwidth": 2,
                        "linecolor": "white",
                        "showbackground": true,
                        "ticks": "",
                        "zerolinecolor": "white"
                      }
                    },
                    "shapedefaults": {
                      "line": {
                        "color": "#2a3f5f"
                      }
                    },
                    "ternary": {
                      "aaxis": {
                        "gridcolor": "white",
                        "linecolor": "white",
                        "ticks": ""
                      },
                      "baxis": {
                        "gridcolor": "white",
                        "linecolor": "white",
                        "ticks": ""
                      },
                      "bgcolor": "#E5ECF6",
                      "caxis": {
                        "gridcolor": "white",
                        "linecolor": "white",
                        "ticks": ""
                      }
                    },
                    "title": {
                      "x": 0.05
                    },
                    "xaxis": {
                      "automargin": true,
                      "gridcolor": "white",
                      "linecolor": "white",
                      "ticks": "",
                      "title": {
                        "standoff": 15
                      },
                      "zerolinecolor": "white",
                      "zerolinewidth": 2
                    },
                    "yaxis": {
                      "automargin": true,
                      "gridcolor": "white",
                      "linecolor": "white",
                      "ticks": "",
                      "title": {
                        "standoff": 15
                      },
                      "zerolinecolor": "white",
                      "zerolinewidth": 2
                    }
                  }
                },
                "title": {
                  "text": "Tie Count for Each Combination of Models",
                  "x": 0.5,
                  "y": 0.07
                },
                "width": 800,
                "xaxis": {
                  "anchor": "y",
                  "constrain": "domain",
                  "domain": [
                    0,
                    1
                  ],
                  "scaleanchor": "y",
                  "side": "top",
                  "title": {
                    "text": "Agent B"
                  }
                },
                "yaxis": {
                  "anchor": "x",
                  "autorange": "reversed",
                  "constrain": "domain",
                  "domain": [
                    0,
                    1
                  ],
                  "title": {
                    "text": "Agent A"
                  }
                }
              }
            }
          },
          "metadata": {},
          "output_type": "display_data"
        }
      ],
      "source": [
        "\n",
        "\n",
        "visualize_battle_count(ratings[ratings['Rating'].str.contains(\"Tie\")], \"Tie Count for Each Combination of Models\")\n",
        "\n",
        "\n"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "PWuJDvvPavSA"
      },
      "source": [
        "# Preliminary Ranking - Basic Elo"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 130,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "4knQGsFZcbqI",
        "outputId": "e4b7a9ed-1fc1-4f77-d2d2-ef376e01e732"
      },
      "outputs": [
        {
          "name": "stdout",
          "output_type": "stream",
          "text": [
            "                                                   Elo Rating\n",
            "llamaindex brave-search agent (gpt-4o-2024-08-06)     1048.85\n",
            "langchain Wikipedia (claude-3-5-sonnet-20240620)      1038.12\n",
            "langchain Wikipedia (open-mixtral-8x22b)              1037.76\n",
            "langchain Wikipedia (llama-3.1-70B-instruct)          1034.49\n",
            "langchain Wikipedia (open-mixtral-8x7b)               1033.36\n",
            "...                                                       ...\n",
            "langchain Wolfram Alpha (gemini-1.5-flash-001)         980.81\n",
            "langchain Wikipedia (llama-3.1-8B-instruct)            977.79\n",
            "llamaindex wikipedia (llama-3.1-405B-instruct)         977.21\n",
            "langchain Wikipedia (gemini-1.5-flash-001)             977.03\n",
            "openai general assistant (gpt-4o-2024-08-06)           947.47\n",
            "\n",
            "[447 rows x 1 columns]\n"
          ]
        }
      ],
      "source": [
        "from collections import defaultdict\n",
        "\n",
        "def compute_online_elo(battles, K=4, SCALE=400, BASE=10, INIT_RATING=1000):\n",
        "    rating = defaultdict(lambda: INIT_RATING)\n",
        "\n",
        "    for _, battle in battles.iterrows():\n",
        "        model_a = battle['Agent_A']['Agent name']\n",
        "        model_b = battle['Agent_B']['Agent name']\n",
        "        winner = battle['Rating']\n",
        "\n",
        "        ra = rating[model_a]\n",
        "        rb = rating[model_b]\n",
        "        ea = 1 / (1 + BASE ** ((rb - ra) / SCALE))\n",
        "        eb = 1 / (1 + BASE ** ((ra - rb) / SCALE))\n",
        "\n",
        "        if winner == \"A is better\":\n",
        "            sa = 1\n",
        "        elif winner == \"B is better\":\n",
        "            sa = 0\n",
        "        elif winner == \"Tie\" or winner == \"Both are bad\":\n",
        "            sa = 0.5\n",
        "        else:\n",
        "            raise Exception(f\"unexpected rating {winner}\")\n",
        "\n",
        "        rating[model_a] += K * (sa - ea)\n",
        "        rating[model_b] += K * (1 - sa - eb)\n",
        "\n",
        "    return rating\n",
        "\n",
        "# Example usage:\n",
        "elo_ratings = compute_online_elo(ratings)\n",
        "\n",
        "# Convert to DataFrame for easier viewing\n",
        "elo_df = pd.DataFrame.from_dict(elo_ratings, orient='index', columns=['Elo Rating'])\n",
        "elo_df = elo_df.sort_values('Elo Rating', ascending=False)\n",
        "print(elo_df)"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 131,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 461
        },
        "id": "JY7EIaOZeWmX",
        "outputId": "026de82f-58a9-4b85-c3ea-dabc2c933d44"
      },
      "outputs": [
        {
          "name": "stdout",
          "output_type": "stream",
          "text": [
            "defaultdict(<function compute_online_elo.<locals>.<lambda> at 0x29b5ea680>, {'langchain brave-search agent (gpt-4o-2024-08-06)': 1025.764576702692, 'langchain google-serper search agent (gpt-4o-2024-08-06)': 1001.1681895087257, 'sql agent plotter langchain (gpt-4o-2024-08-06)': 994.3393786867174, 'langchain ArXiv Article Fetcher (gpt-4o-2024-08-06)': 1000.3575169468093, 'langchain alpha-vantage stock agent (gpt-4o-2024-08-06)': 1022.1255410672924, 'langchain alpha-vantage stock agent (claude-3-5-sonnet-20240620)': 987.2784843320769, 'langchain alpha-vantage stock agent (gpt-4-0613)': 998.0, 'langchain alpha-vantage stock agent (claude-3-opus-20240229)': 1003.8135434468632, 'langchain Google Jobs (gpt-4o-2024-08-06)': 1001.9772402495474, 'langchain Google Jobs (gpt-4-turbo-2024-04-09)': 998.0227597504526, 'langchain google-serper search agent (claude-3-5-sonnet-20240620)': 1024.7515435947003, 'llamaindex Yelp Tool (claude-3-opus-20240229)': 998.0, 'langchain brave-search agent (claude-3-opus-20240229)': 1005.8651333137688, 'langchain google-serper search agent (claude-3-opus-20240229)': 1027.3989596099555, 'langchain ArXiv Article Fetcher (claude-3-haiku-20240307)': 1003.8124136111827, 'llamaindex ArXiv Article Fetcher (claude-3-5-sonnet-20240620)': 994.2010137772048, 'langchain Google Jobs (gpt-4o-2024-05-13)': 1000.0, 'langchain Wikipedia (gpt-4-turbo-2024-04-09)': 1021.0587594087597, 'langchain alpha-vantage stock agent (gpt-4-turbo-2024-04-09)': 981.6263398354808, 'llamaindex Yahoo Finance News (gpt-4-turbo-2024-04-09)': 1001.908587453443, 'sql agent plotter langchain (gpt-4o-2024-05-13)': 997.8581617974004, 'sql agent plotter llamaindex (gpt-4o-2024-05-13)': 1004.1749655802439, 'langchain YouTube Search (gpt-4o-2024-08-06)': 998.9073914163674, 'langchain YouTube Search (gpt-4-turbo-2024-04-09)': 994.0356139008563, 'llamaindex brave-search agent (claude-3-opus-20240229)': 997.8634821401606, 'crewai Meeting Preparation Agent Crew (gpt-4o-2024-08-06)': 998.0, 'openai general assistant (gpt-4-turbo-2024-04-09)': 994.1237390667068, 'sql agent plotter llamaindex (gpt-4o-2024-08-06)': 1002.0000538766014, 'langchain Google Lens (gpt-4o-2024-08-06)': 1001.9022214781791, 'langchain Wolfram Alpha (gpt-4o-2024-08-06)': 1005.9667575310839, 'langchain ArXiv Article Fetcher (gpt-4-0613)': 1015.3495841962177, 'langchain Google Lens (claude-3-haiku-20240307)': 997.861143061272, 'anthropic calculator tool (claude-3-opus-20240229)': 1003.4438337026572, 'anthropic customer service agent (claude-3-opus-20240229)': 1003.9427029408577, 'langchain Wolfram Alpha (claude-3-5-sonnet-20240620)': 1002.2015855430202, 'llamaindex Yahoo Finance News (gpt-4o-2024-05-13)': 1000.0718842189933, 'langchain Eden AI Integration (gpt-4o-2024-08-06)': 998.0228930549515, 'langchain Google Lens (gpt-4o-2024-05-13)': 1000.1194110960921, 'langchain brave-search agent (claude-3-haiku-20240307)': 993.8236387828626, 'langchain google-serper search agent (claude-3-haiku-20240307)': 1006.3736691810634, 'langchain brave-search agent (claude-3-5-sonnet-20240620)': 1003.6466524455419, 'crewai AI Crew for Trip Planning (claude-3-haiku-20240307)': 996.0533837003862, 'openai general assistant (gpt-4-0613)': 996.0681540303542, 'langchain Exa Search Integration (gpt-4-0613)': 998.0, 'langchain Exa Search Integration (gpt-4-turbo-2024-04-09)': 1002.0, 'langchain Google Lens (gpt-4-0613)': 995.9999406392069, 'langchain YouTube Search (claude-3-5-sonnet-20240620)': 999.9856376553657, 'langchain google-serper search agent (gpt-4-turbo-2024-04-09)': 1025.0400527839377, 'sql agent plotter llamaindex (gpt-4-turbo-2024-04-09)': 997.9894012219461, 'langchain ArXiv Article Fetcher (gpt-4o-2024-05-13)': 1017.4977766095955, 'llamaindex ArXiv Article Fetcher (gpt-4o-2024-05-13)': 1003.9995737680108, 'sql agent plotter langchain (claude-3-opus-20240229)': 998.0117074922215, 'openai assistant code interpreter (gpt-4-turbo-2024-04-09)': 1003.8972484627045, 'langchain Python REPL (gpt-4o-2024-08-06)': 994.2196094066289, 'langchain Yahoo Finance News (gpt-4o-2024-05-13)': 1000.096230619394, 'sql agent plotter langchain (gpt-4-turbo-2024-04-09)': 994.0698780068409, 'sql agent plotter langchain (claude-3-haiku-20240307)': 999.977237624856, 'sql agent plotter llamaindex (gpt-4-0613)': 1001.9430948913121, 'langchain google-serper search agent (gpt-4o-2024-05-13)': 1020.3785302426877, 'langchain Wikipedia (claude-3-opus-20240229)': 1002.2016362631082, 'langchain google-serper search agent (gemini-1.5-flash-001)': 987.0098720470634, 'langchain Dall-E Image Generator (gpt-4o-2024-08-06)': 999.8031416577304, 'langchain Google Lens (llama-3.1-405B-instruct)': 998.0, 'langchain Gmail Toolkit (mistral-large-2407)': 1000.0, 'langchain Wolfram Alpha (open-mixtral-8x7b)': 1004.3457216907696, 'langchain Wikipedia (llama-3.1-70B-instruct)': 1034.4910105396423, 'langchain ArXiv Article Fetcher (mistral-large-2407)': 990.3410070315038, 'langchain google-serper search agent (gemini-1.5-pro-001)': 1006.263167315601, 'langchain Google Jobs (gpt-4-0613)': 999.9771069450485, 'langchain brave-search agent (gpt-4o-2024-05-13)': 996.2302927268856, 'langchain google-serper search agent (gpt-4-0613)': 996.9706687128206, 'langchain alpha-vantage stock agent (gemini-1.5-flash-001)': 1020.8812882307113, 'langchain Wikipedia (gemini-1.5-pro-001)': 1001.6782156554169, 'langchain Wolfram Alpha (llama-3.1-405B-instruct)': 1006.030223043594, 'llamaindex wikipedia (gpt-4o-2024-08-06)': 996.4131974178677, 'llamaindex brave-search agent (gpt-4o-2024-08-06)': 1048.8507622085103, 'langchain Dall-E Image Generator (gpt-4o-2024-05-13)': 1004.0780206279038, 'langchain google-serper search agent (mistral-large-2407)': 1014.3300792396029, 'openai assistant code interpreter (gpt-4o-2024-05-13)': 1001.8861934692831, 'langchain Pandas DataFrame (gpt-4o-2024-08-06)': 1005.5416746249869, 'langchain brave-search agent (open-mixtral-8x7b)': 1004.0789189030497, 'langchain Python REPL (gpt-4o-2024-05-13)': 1005.9540181116879, 'langchain Wikipedia (open-mixtral-8x22b)': 1037.7645873809042, 'langchain Google Lens (gpt-4-turbo-2024-04-09)': 996.1446121436667, 'langchain Python REPL (claude-3-5-sonnet-20240620)': 998.0684602852072, 'langchain brave-search agent (llama-3.1-8B-instruct)': 995.796722735829, 'langchain PubMed Biomedical Literature Tool (gpt-4o-2024-05-13)': 1000.0935902686899, 'langchain YouTube Search (claude-3-opus-20240229)': 998.1310574724138, 'llamaindex Exa Search Integration (gpt-4o-2024-08-06)': 1002.0115127982992, 'langchain Wolfram Alpha (claude-3-haiku-20240307)': 992.5491949927589, 'langchain Wikipedia (gemini-1.5-flash-001)': 977.0323067212771, 'langchain Wikipedia (gpt-4o-2024-08-06)': 989.5968253156244, 'langchain brave-search agent (gemini-1.5-pro-001)': 1002.0582999443659, 'langchain google-serper search agent (open-mixtral-8x7b)': 1027.7896114504395, 'openai assistant customer support chatbot (gpt-4o-2024-05-13)': 998.0228923095134, 'langchain Riza Code Interpreter (gpt-4o-2024-05-13)': 1000.0228923095134, 'langchain Riza Code Interpreter (claude-3-opus-20240229)': 1000.0, 'anthropic calculator tool (claude-3-5-sonnet-20240620)': 1002.7439686813711, 'langchain Dall-E Image Generator (gpt-4-0613)': 999.9695358139983, 'langchain OpenWeatherMap (gpt-4o-2024-08-06)': 1006.0777232954011, 'langchain Dall-E Image Generator (claude-3-5-sonnet-20240620)': 1002.0, 'langchain Eden AI Integration (open-mixtral-8x22b)': 998.0, 'langchain OpenWeatherMap (gemini-1.5-pro-001)': 998.0115127982992, 'langchain Dall-E Image Generator (claude-3-opus-20240229)': 1002.0, 'langchain Wolfram Alpha (llama-3.1-70B-instruct)': 994.2894462572334, 'langchain Python REPL (gpt-4-0613)': 1002.030902524033, 'langchain Dall-E Image Generator (gpt-4-turbo-2024-04-09)': 1000.0, 'langchain Riza Code Interpreter (open-mixtral-8x22b)': 1000.0, 'langchain Eden AI Integration (mistral-large-2407)': 1000.0, 'langchain alpha-vantage stock agent (gpt-4o-2024-05-13)': 1002.0717893212923, 'langchain Yahoo Finance News (gpt-4-0613)': 999.9465526658656, 'langchain alpha-vantage stock agent (claude-3-haiku-20240307)': 997.8271634433476, 'langchain Yahoo Finance News (gemini-1.5-pro-001)': 996.0353641612835, 'anthropic pdf upload summarization (claude-3-opus-20240229)': 1003.9999337270057, 'llamaindex OpenAPI Tool (gpt-4o-2024-08-06)': 1000.1510462647509, 'langchain OpenWeatherMap (claude-3-5-sonnet-20240620)': 1002.0113802523107, 'llamaindex OpenWeatherMap (llama-3.1-70B-instruct)': 996.0228938781771, 'llamaindex OpenWeatherMap (mistral-large-2407)': 992.092857496597, 'langchain Yahoo Finance News (claude-3-haiku-20240307)': 994.1669234290338, 'langchain Pandas DataFrame (claude-3-5-sonnet-20240620)': 1003.9985759465154, 'langchain OpenWeatherMap (claude-3-haiku-20240307)': 1000.0, 'llamaindex OpenWeatherMap (open-mixtral-8x7b)': 1000.0, 'langchain alpha-vantage stock agent (mistral-large-2407)': 998.0, 'langchain Yahoo Finance News (llama-3.1-70B-instruct)': 1000.0, 'openai general assistant (gpt-4o-2024-08-06)': 947.4663921255926, 'crewai Meeting Preparation Agent Crew (llama-3.1-405B-instruct)': 999.9322955712144, 'langchain OpenWeatherMap (gemini-1.5-flash-001)': 1000.0002717303207, 'langchain Wolfram Alpha (open-mixtral-8x22b)': 1007.9873555041148, 'langchain OpenWeatherMap (llama-3.1-8B-instruct)': 998.0115127982992, 'llamaindex OpenWeatherMap (open-mixtral-8x22b)': 1001.9884872017008, 'langchain Wolfram Alpha (llama-3.1-8B-instruct)': 992.7596499559995, 'langchain OpenWeatherMap (gpt-4o-2024-05-13)': 1000.0115127982992, 'langchain OpenWeatherMap (llama-3.1-405B-instruct)': 998.022892292048, 'llamaindex OpenWeatherMap (claude-3-opus-20240229)': 999.9769751663554, 'crewai AI Crew for Game Building (gpt-4-turbo-2024-04-09)': 1001.9885530910387, 'langchain alpha-vantage stock agent (llama-3.1-70B-instruct)': 1003.9211081966788, 'langchain Yahoo Finance News (claude-3-opus-20240229)': 993.9958264206238, 'langchain alpha-vantage stock agent (llama-3.1-8B-instruct)': 1000.1487963794192, 'langchain Yahoo Finance News (mistral-large-2407)': 1000.1028487014147, 'langchain Yahoo Finance News (open-mixtral-8x7b)': 1003.8896787161465, 'langchain brave-search agent (gpt-4o-mini-2024-07-18)': 995.9350789311545, 'langchain google-serper search agent (gpt-4o-mini-2024-07-18)': 1010.625283613175, 'langchain ArXiv Article Fetcher (gpt-4o-mini-2024-07-18)': 1002.1046391263626, 'langchain Yahoo Finance News (gpt-4o-2024-08-06)': 1001.6973924019027, 'langchain google-serper search agent (llama-3.1-8B-instruct)': 982.275118730761, 'crewai AI Crew for Trip Planning (open-mixtral-8x22b)': 995.8669354999819, 'langchain google-serper search agent (open-mixtral-8x22b)': 1012.1329549188968, 'langchain Wikipedia (claude-3-5-sonnet-20240620)': 1038.123935914951, 'llamaindex wikipedia (llama-3.1-8B-instruct)': 984.5001951865612, 'langchain alpha-vantage stock agent (llama-3.1-405B-instruct)': 1001.9783467406038, 'langchain Yahoo Finance News (llama-3.1-8B-instruct)': 998.0110795365044, 'langchain alpha-vantage stock agent (open-mixtral-8x22b)': 999.9033299947938, 'langchain Yahoo Finance News (gpt-4-turbo-2024-04-09)': 997.9376402446102, 'langchain alpha-vantage stock agent (open-mixtral-8x7b)': 998.0225209487222, 'langchain alpha-vantage stock agent (gpt-4o-mini-2024-07-18)': 1001.8903561658432, 'langchain AskNews (gpt-4o-2024-08-06)': 999.934775639415, 'langchain Yahoo Finance News (open-mixtral-8x22b)': 995.9117322137424, 'langchain AskNews (llama-3.1-405B-instruct)': 1000.0353449011005, 'llamaindex Yelp Tool (claude-3-haiku-20240307)': 1000.1132923518157, 'llamaindex Yahoo Finance News (llama-3.1-405B-instruct)': 999.9884872017008, 'langchain Wikipedia (gpt-4-0613)': 1019.2161195744317, 'llamaindex wikipedia (llama-3.1-405B-instruct)': 977.2077913223322, 'llamaindex wikipedia (gemini-1.5-flash-001)': 992.3967949708126, 'langchain Wolfram Alpha (gpt-4-0613)': 992.4005278358314, 'langchain Wolfram Alpha (mistral-large-2407)': 1000.0586744804667, 'langchain Wolfram Alpha (claude-3-opus-20240229)': 993.1101103483335, 'langchain Wikipedia (mistral-large-2407)': 999.4060276023663, 'langchain Riza Code Interpreter (open-mixtral-8x7b)': 1000.0115127982992, 'langchain Wolfram Alpha (gemini-1.5-flash-001)': 980.8059531052712, 'sql agent plotter llamaindex (claude-3-opus-20240229)': 999.9998682105952, 'langchain Wikipedia (open-mixtral-8x7b)': 1033.3628634510776, 'llamaindex Yahoo Finance News (llama-3.1-8B-instruct)': 997.9996217192379, 'openai assistant function calling (gpt-4o-2024-08-06)': 1012.6319256769001, 'llamaindex wikipedia (claude-3-5-sonnet-20240620)': 1003.9113357731235, 'llamaindex Yahoo Finance News (claude-3-haiku-20240307)': 1000.0838736427233, 'anthropic sql query (claude-3-haiku-20240307)': 994.1522028591684, 'langchain Python REPL (gemini-1.5-flash-001)': 1003.9993774976833, 'llamaindex Yahoo Finance News (claude-3-5-sonnet-20240620)': 999.999564215813, 'langchain Wikipedia (llama-3.1-8B-instruct)': 977.790623168852, 'langchain NASA Toolkit (gemini-1.5-flash-001)': 997.9999297262262, 'anthropic web page reader (claude-3-5-sonnet-20240620)': 992.3154836816262, 'langchain Dall-E Image Generator (open-mixtral-8x7b)': 998.0, 'langchain Wikipedia (claude-3-haiku-20240307)': 1013.8056124411635, 'langchain brave-search agent (gpt-4-turbo-2024-04-09)': 1005.3855284128553, 'llamaindex wikipedia (claude-3-opus-20240229)': 998.5024192802258, 'langchain You.com Search (open-mixtral-8x22b)': 995.8180467886568, 'langchain NASA Toolkit (gpt-4-turbo-2024-04-09)': 1000.0263591119402, 'langchain Dall-E Image Generator (open-mixtral-8x22b)': 1000.1400372353544, 'langchain Wikipedia (gpt-4o-mini-2024-07-18)': 992.056701417211, 'langchain AskNews (llama-3.1-70B-instruct)': 1003.9927605848824, 'langchain Wikipedia (llama-3.1-405B-instruct)': 1011.9097622755228, 'langchain PubMed Biomedical Literature Tool (llama-3.1-70B-instruct)': 999.9903654716084, 'langchain ArXiv Article Fetcher (gemini-1.5-pro-001)': 999.9359548798334, 'langchain NASA Toolkit (gpt-4-0613)': 994.4910918839718, 'langchain ArXiv Article Fetcher (llama-3.1-70B-instruct)': 1009.9053698680904, 'langchain ArXiv Article Fetcher (gemini-1.5-flash-001)': 1002.2742980365969, 'langchain AskNews (open-mixtral-8x7b)': 1000.0582242382276, 'langchain brave-search agent (llama-3.1-405B-instruct)': 994.3456094452768, 'langchain google-serper search agent (llama-3.1-70B-instruct)': 1028.8797843664556, 'langchain Tavily Search (gemini-1.5-flash-001)': 995.8196331958657, 'llamaindex wikipedia (gpt-4-0613)': 984.6198732000414, 'llamaindex brave-search agent (gemini-1.5-pro-001)': 997.9885534725117, 'langchain brave-search agent (gpt-4-0613)': 986.3580957927414, 'llamaindex brave-search agent (open-mixtral-8x22b)': 997.9671372755109, 'langchain Wikipedia (gpt-4o-2024-05-13)': 1003.5743335266402, 'langchain Google Jobs (llama-3.1-405B-instruct)': 996.0000673748526, 'sql agent plotter langchain (claude-3-5-sonnet-20240620)': 1000.0, 'sql agent plotter llamaindex (llama-3.1-70B-instruct)': 1000.0, 'langchain You.com Search (llama-3.1-70B-instruct)': 1002.0113798926118, 'langchain Python REPL (open-mixtral-8x22b)': 1000.054175472978, 'langchain google-serper search agent (llama-3.1-405B-instruct)': 1023.4130744986948, 'langchain You.com Search (gpt-4-turbo-2024-04-09)': 1000.3627419959956, 'openai assistant function calling (gpt-4o-2024-05-13)': 999.9741411855806, 'langchain PubMed Biomedical Literature Tool (gpt-4o-2024-08-06)': 1003.9598375217249, 'openai general assistant (gpt-4o-2024-05-13)': 992.123881994398, 'openai assistant customer support chatbot (gpt-4-0613)': 999.9886170751375, 'anthropic customer service agent (claude-3-5-sonnet-20240620)': 1000.0, 'langchain Dall-E Image Generator (mistral-large-2407)': 998.0115786984506, 'langchain Python REPL (llama-3.1-70B-instruct)': 1001.9223753651501, 'langchain Golden Query Integration (llama-3.1-70B-instruct)': 1001.8182731348505, 'crewai Meeting Preparation Agent Crew (gemini-1.5-pro-001)': 999.9884860253889, 'langchain Tavily Search (claude-3-5-sonnet-20240620)': 1006.3177575844952, 'langchain Dall-E Image Generator (llama-3.1-8B-instruct)': 998.0127725879694, 'langchain ArXiv Article Fetcher (gpt-4-turbo-2024-04-09)': 1008.1822806596118, 'crewai AI Crew for Game Building (open-mixtral-8x7b)': 998.0007793153444, 'langchain Pandas DataFrame (claude-3-opus-20240229)': 1005.9769758710182, 'sql agent plotter langchain (open-mixtral-8x7b)': 994.0453527584777, 'openai assistant function calling (gpt-4-0613)': 996.1995456645767, 'langchain NASA Toolkit (claude-3-opus-20240229)': 1000.0, 'crewai AI Crew for Game Building (gemini-1.5-flash-001)': 1000.0, 'langchain Eden AI Integration (gemini-1.5-flash-001)': 1000.0227605479525, 'langchain ArXiv Article Fetcher (open-mixtral-8x22b)': 1003.8828276831613, 'anthropic web page reader (claude-3-haiku-20240307)': 985.2228222096327, 'crewai AI Crew for Trip Planning (llama-3.1-70B-instruct)': 996.0440235701915, 'langchain Google Lens (open-mixtral-8x7b)': 1007.6266296895957, 'llamaindex ArXiv Article Fetcher (llama-3.1-70B-instruct)': 994.0333619070835, 'llamaindex wikipedia (mistral-large-2407)': 986.3108781924417, 'crewai AI Crew for Trip Planning (gpt-4-0613)': 997.9887367793491, 'crewai AI Crew for Game Building (mistral-large-2407)': 999.9999341085037, 'langchain PubMed Biomedical Literature Tool (gpt-4o-mini-2024-07-18)': 1004.2181445569306, 'langchain You.com Search (gemini-1.5-flash-001)': 992.0502133164263, 'llamaindex OpenWeatherMap (claude-3-haiku-20240307)': 1001.9771069450824, 'langchain Google Jobs (claude-3-5-sonnet-20240620)': 996.0037143271911, 'openai assistant code interpreter (gpt-4o-2024-08-06)': 996.0805168332272, 'langchain Python REPL (claude-3-haiku-20240307)': 1003.9105886680802, 'langchain brave-search agent (mistral-large-2407)': 1001.5274590931789, 'langchain brave-search agent (open-mixtral-8x22b)': 998.136023498728, 'llamaindex Yahoo Finance News (gemini-1.5-pro-001)': 998.1177072596669, 'langchain PubMed Biomedical Literature Tool (claude-3-haiku-20240307)': 998.2675026749032, 'langchain ArXiv Article Fetcher (claude-3-opus-20240229)': 1002.0757466712448, 'llamaindex Yahoo Finance News (claude-3-opus-20240229)': 997.989165520119, 'langchain NASA Toolkit (claude-3-5-sonnet-20240620)': 992.1379089551343, 'llamaindex ArXiv Article Fetcher (mistral-large-2407)': 996.0799846680225, 'llamaindex ArXiv Article Fetcher (llama-3.1-405B-instruct)': 997.9995461589999, 'llamaindex ArXiv Article Fetcher (gemini-1.5-flash-001)': 997.9999416273231, 'langchain OpenWeatherMap (gpt-4o-mini-2024-07-18)': 1001.9647772184477, 'langchain PubMed Biomedical Literature Tool (gemini-1.5-pro-001)': 996.1407738110314, 'llamaindex OpenWeatherMap (gemini-1.5-pro-001)': 997.9886197476893, 'llamaindex OpenWeatherMap (gpt-4-0613)': 999.9884872017008, 'langchain OpenWeatherMap (open-mixtral-8x22b)': 1002.0, 'llamaindex OpenWeatherMap (llama-3.1-8B-instruct)': 996.0342726605538, 'langchain OpenWeatherMap (open-mixtral-8x7b)': 1002.0, 'llamaindex OpenWeatherMap (claude-3-5-sonnet-20240620)': 998.0236080645575, 'langchain OpenWeatherMap (claude-3-opus-20240229)': 1005.9200811681326, 'langchain AskNews (gemini-1.5-flash-001)': 1001.9510728704379, 'langchain alpha-vantage stock agent (gemini-1.5-pro-001)': 998.0226508971939, 'langchain Yahoo Finance News (claude-3-5-sonnet-20240620)': 996.3437513264963, 'langchain YouTube Search (claude-3-haiku-20240307)': 998.0940835772969, 'crewai AI Crew for Game Building (gpt-4-0613)': 1000.0230248336446, 'llamaindex Yahoo Finance News (gemini-1.5-flash-001)': 997.9998685919176, 'llamaindex ArXiv Article Fetcher (gpt-4o-2024-08-06)': 992.4601470592004, 'langchain ArXiv Article Fetcher (llama-3.1-405B-instruct)': 998.3646233844089, 'llamaindex ArXiv Article Fetcher (open-mixtral-8x22b)': 1005.9425694357893, 'llamaindex ArXiv Article Fetcher (open-mixtral-8x7b)': 999.9879164521043, 'langchain YouTube Search (gemini-1.5-flash-001)': 992.3511107082898, 'llamaindex ArXiv Article Fetcher (gpt-4o-mini-2024-07-18)': 1000.0227401511396, 'langchain ArXiv Article Fetcher (open-mixtral-8x7b)': 1009.9690829209175, 'langchain Wolfram Alpha (gemini-1.5-pro-001)': 985.264542931184, 'langchain Exa Search Integration (claude-3-5-sonnet-20240620)': 998.0338181807484, 'langchain brave-search agent (gemini-1.5-flash-001)': 1000.0907651674629, 'langchain Wolfram Alpha (gpt-4-turbo-2024-04-09)': 1006.0117877645489, 'langchain Tavily Search (gpt-4-0613)': 1001.9888816905445, 'langchain Exa Search Integration (open-mixtral-8x22b)': 997.9997587960544, 'langchain PubMed Biomedical Literature Tool (gpt-4-0613)': 998.184039703399, 'llamaindex wikipedia (open-mixtral-8x22b)': 1002.1772541931362, 'langchain ArXiv Article Fetcher (llama-3.1-8B-instruct)': 986.2997661215029, 'anthropic calculator tool (claude-3-haiku-20240307)': 1009.0154713575592, 'crewai AI Crew for Game Building (gpt-4o-2024-05-13)': 1000.0, 'openai assistant code interpreter (gpt-4-0613)': 998.0325741150457, 'llamaindex wikipedia (open-mixtral-8x7b)': 982.6845157538969, 'langchain NASA Toolkit (open-mixtral-8x7b)': 1000.0108351153907, 'langchain Yahoo Finance News (gemini-1.5-flash-001)': 1001.9995029454528, 'langchain You.com Search (gpt-4-0613)': 997.7379818052741, 'llamaindex wikipedia (llama-3.1-70B-instruct)': 981.3566305449104, 'crewai AI Crew for Trip Planning (gemini-1.5-pro-001)': 998.024197338498, 'crewai AI Crew for Game Building (llama-3.1-8B-instruct)': 1001.9884872017008, 'langchain PubMed Biomedical Literature Tool (claude-3-opus-20240229)': 997.8679586525008, 'anthropic sql query (claude-3-5-sonnet-20240620)': 1004.0210379255686, 'langchain Riza Code Interpreter (llama-3.1-405B-instruct)': 998.0122876001418, 'langchain ArXiv Article Fetcher (claude-3-5-sonnet-20240620)': 990.3660143713963, 'llamaindex ArXiv Article Fetcher (gpt-4-0613)': 994.1023763734436, 'langchain Python REPL (gpt-4-turbo-2024-04-09)': 1002.0404736318369, 'langchain NASA Toolkit (mistral-large-2407)': 994.1089421513328, 'langchain PubMed Biomedical Literature Tool (llama-3.1-405B-instruct)': 992.1821692099248, 'langchain brave-search agent (llama-3.1-70B-instruct)': 1007.6495088229284, 'langchain PubMed Biomedical Literature Tool (open-mixtral-8x22b)': 1006.1310568624837, 'langchain NASA Toolkit (llama-3.1-8B-instruct)': 996.0542864549996, 'langchain Dall-E Image Generator (claude-3-haiku-20240307)': 1000.1332205246083, 'openai assistant function calling (gpt-4o-mini-2024-07-18)': 1003.9884872017008, 'langchain Wolfram Alpha (gpt-4o-2024-05-13)': 1000.1721820787907, 'llamaindex Wolfram Alpha (gpt-4-0613)': 1000.0, 'llamaindex wikipedia (gpt-4o-2024-05-13)': 994.2136528749845, 'langchain Tavily Search (mistral-large-2407)': 1003.8753578718142, 'langchain NASA Toolkit (llama-3.1-70B-instruct)': 995.9993861565185, 'crewai AI Crew for Trip Planning (mistral-large-2407)': 996.2524749859504, 'langchain Wolfram Alpha (gpt-4o-mini-2024-07-18)': 1000.0353951637118, 'langchain Shell (gpt-4o-mini-2024-07-18)': 1001.9016580089782, 'langchain NASA Toolkit (open-mixtral-8x22b)': 994.1369500195548, 'langchain Gmail Toolkit (gemini-1.5-flash-001)': 999.9885519533846, 'anthropic pdf upload summarization (claude-3-haiku-20240307)': 1002.0, 'anthropic web page reader (claude-3-opus-20240229)': 996.2549140322355, 'llamaindex Yahoo Finance News (mistral-large-2407)': 1002.0121365882807, 'openai assistant function calling (gpt-4-turbo-2024-04-09)': 994.0743541016313, 'langchain Pandas DataFrame (gemini-1.5-pro-001)': 1000.0672998350105, 'sql agent plotter langchain (llama-3.1-405B-instruct)': 996.0690981298321, 'langchain Pandas DataFrame (gemini-1.5-flash-001)': 1007.8630681552818, 'sql agent plotter langchain (gpt-4-0613)': 996.0567849535946, 'langchain Yahoo Finance News (gpt-4o-mini-2024-07-18)': 1003.9768402430805, 'llamaindex Wolfram Alpha (llama-3.1-70B-instruct)': 999.9885584278435, 'llamaindex Wolfram Alpha (gpt-4o-2024-08-06)': 1002.0456053425052, 'llamaindex wikipedia (gpt-4-turbo-2024-04-09)': 988.7188077403183, 'llamaindex wikipedia (claude-3-haiku-20240307)': 994.4908946458767, 'crewai AI Crew for Game Building (gpt-4o-2024-08-06)': 995.9727532902343, 'langchain PubMed Biomedical Literature Tool (open-mixtral-8x7b)': 1002.1626069440384, 'llamaindex wikipedia (gemini-1.5-pro-001)': 996.2947021215053, 'langchain PubMed Biomedical Literature Tool (llama-3.1-8B-instruct)': 1000.1730028405026, 'langchain Python REPL (claude-3-opus-20240229)': 999.9908580660601, 'anthropic sql query (claude-3-opus-20240229)': 1002.0409765206498, 'langchain Dall-E Image Generator (gpt-4o-mini-2024-07-18)': 1001.9887348441753, 'langchain Dall-E Image Generator (llama-3.1-70B-instruct)': 1002.0984379948956, 'langchain Riza Code Interpreter (claude-3-5-sonnet-20240620)': 1000.0226295445775, 'langchain AskNews (gpt-4o-2024-05-13)': 997.9789096964942, 'sql agent plotter langchain (llama-3.1-70B-instruct)': 996.101952592663, 'langchain YouTube Search (llama-3.1-8B-instruct)': 997.9594292531523, 'crewai AI Crew for Trip Planning (gpt-4o-2024-08-06)': 998.0052917459445, 'llamaindex Wolfram Alpha (open-mixtral-8x22b)': 998.011505495067, 'langchain YouTube Search (gpt-4o-2024-05-13)': 1000.051975231292, 'langchain PubMed Biomedical Literature Tool (mistral-large-2407)': 1003.8908855877323, 'langchain Tavily Search (gpt-4o-2024-05-13)': 1001.9598283739629, 'langchain Google Jobs (open-mixtral-8x7b)': 996.0850352605786, 'langchain AskNews (claude-3-5-sonnet-20240620)': 998.1727870164267, 'llamaindex Tavily Research Tool (open-mixtral-8x7b)': 998.0360747049585, 'langchain Tavily Search (gpt-4o-2024-08-06)': 998.1366473151247, 'langchain Golden Query Integration (open-mixtral-8x7b)': 1001.9568154848936, 'langchain Yahoo Finance News (llama-3.1-405B-instruct)': 997.9382658221006, 'langchain You.com Search (claude-3-haiku-20240307)': 998.0786193500071, 'langchain Tavily Search (llama-3.1-405B-instruct)': 1003.9557191809105, 'crewai AI Crew for Trip Planning (llama-3.1-8B-instruct)': 997.9691913317397, 'langchain You.com Search (gpt-4o-2024-05-13)': 997.991261806342, 'llamaindex OpenWeatherMap (gpt-4-turbo-2024-04-09)': 1000.0, 'langchain YouTube Search (gemini-1.5-pro-001)': 992.1103326677561, 'langchain NASA Toolkit (gpt-4o-2024-05-13)': 996.2796152619846, 'llamaindex wikipedia (gpt-4o-mini-2024-07-18)': 1002.1363112533427, 'langchain You.com Search (mistral-large-2407)': 998.0411019058449, 'langchain NASA Toolkit (gpt-4o-2024-08-06)': 998.1224014134916, 'crewai AI Crew for Trip Planning (claude-3-5-sonnet-20240620)': 995.9980580223341, 'langchain YouTube Search (open-mixtral-8x22b)': 997.9465799968386, 'crewai AI Crew for Trip Planning (gpt-4o-2024-05-13)': 998.0958064901278, 'langchain YouTube Search (mistral-large-2407)': 992.0808685612366, 'langchain AskNews (gpt-4-turbo-2024-04-09)': 1002.0426054928467, 'langchain You.com Search (gpt-4o-2024-08-06)': 1001.9422403289592, 'langchain You.com Search (claude-3-opus-20240229)': 992.171911770228, 'langchain AskNews (claude-3-opus-20240229)': 998.1041580849015, 'langchain You.com Search (llama-3.1-405B-instruct)': 1002.1261318061454, 'langchain AskNews (mistral-large-2407)': 1000.0337049005641, 'langchain Shell (claude-3-5-sonnet-20240620)': 1002.0007403105938, 'langchain Pandas DataFrame (llama-3.1-405B-instruct)': 994.3905925358777, 'llamaindex Yelp Tool (llama-3.1-70B-instruct)': 997.9802772859468, 'langchain Python REPL (open-mixtral-8x7b)': 998.0584205643875, 'llamaindex code interpreter (claude-3-5-sonnet-20240620)': 1002.0, 'langchain Shell (gpt-4o-2024-05-13)': 997.988734201633, 'langchain Python REPL (mistral-large-2407)': 1002.0, 'llamaindex code interpreter (gpt-4o-2024-05-13)': 996.0344690900129, 'langchain Python REPL (gpt-4o-mini-2024-07-18)': 1000.1675730564252, 'llamaindex code interpreter (gpt-4-turbo-2024-04-09)': 998.0115127982992, 'llamaindex Yelp Tool (llama-3.1-8B-instruct)': 997.9714543622686, 'crewai AI Crew for Trip Planning (claude-3-opus-20240229)': 997.9831106011004, 'langchain AskNews (llama-3.1-8B-instruct)': 999.99472036053, 'langchain YouTube Search (gpt-4-0613)': 995.9204048708833, 'langchain Pandas DataFrame (mistral-large-2407)': 1002.0224992950289, 'langchain Google Lens (gemini-1.5-pro-001)': 999.9886170696057, 'langchain Pandas DataFrame (llama-3.1-8B-instruct)': 999.9886888374236, 'langchain Dall-E Image Generator (gemini-1.5-pro-001)': 1000.0, 'langchain Google Lens (claude-3-5-sonnet-20240620)': 1002.0421233698046, 'langchain YouTube Search (open-mixtral-8x7b)': 997.9405180651113, 'sql agent plotter langchain (llama-3.1-8B-instruct)': 994.0573591880523, 'langchain Pandas DataFrame (open-mixtral-8x22b)': 1005.9654634995599, 'langchain Pandas DataFrame (gpt-4o-2024-05-13)': 999.9886829373268, 'langchain You.com Search (gemini-1.5-pro-001)': 999.9638666858843, 'sql agent plotter langchain (gemini-1.5-pro-001)': 1002.011316252984, 'langchain You.com Search (open-mixtral-8x7b)': 994.2602303413222, 'langchain YouTube Search (llama-3.1-70B-instruct)': 994.3100175010151, 'langchain You.com Search (gpt-4o-mini-2024-07-18)': 996.3168664485091, 'sql agent plotter langchain (open-mixtral-8x22b)': 998.0115127982992, 'langchain YouTube Search (llama-3.1-405B-instruct)': 996.2864594755418, 'langchain Google Lens (mistral-large-2407)': 999.7328576122625, 'sql agent plotter langchain (mistral-large-2407)': 998.034462090291, 'langchain Google Jobs (gemini-1.5-pro-001)': 997.9911151569931, 'llamaindex Yelp Tool (open-mixtral-8x22b)': 998.1120155405848, 'llamaindex Yelp Tool (gpt-4o-2024-05-13)': 997.8394891863526, 'langchain Google Jobs (gpt-4o-mini-2024-07-18)': 998.1155916749306, 'langchain Golden Query Integration (gpt-4o-mini-2024-07-18)': 997.8793107931291, 'langchain OpenWeatherMap (mistral-large-2407)': 998.0, 'langchain Python REPL (llama-3.1-405B-instruct)': 1002.0, 'langchain Exa Search Integration (gemini-1.5-pro-001)': 1002.0120284445119, 'langchain PubMed Biomedical Literature Tool (claude-3-5-sonnet-20240620)': 1000.197992698123, 'langchain PubMed Biomedical Literature Tool (gpt-4-turbo-2024-04-09)': 1000.2549622459006, 'anthropic pdf upload summarization (claude-3-5-sonnet-20240620)': 1001.9888151320007, 'langchain You.com Search (claude-3-5-sonnet-20240620)': 997.9929997938469, 'llamaindex ArXiv Article Fetcher (llama-3.1-8B-instruct)': 998.0560055114009, 'langchain Tavily Search (open-mixtral-8x7b)': 1000.1246165943685, 'langchain Tavily Search (gpt-4-turbo-2024-04-09)': 1002.011279474149, 'llamaindex code interpreter (gpt-4o-mini-2024-07-18)': 996.0232842172163, 'llamaindex code interpreter (gemini-1.5-pro-001)': 996.0122795848803, 'langchain GraphQL API Integration (gpt-4o-2024-05-13)': 998.0115127982992, 'langchain YouTube Search (gpt-4o-mini-2024-07-18)': 1000.0346152790598, 'langchain Golden Query Integration (gpt-4o-2024-08-06)': 997.9312426799273, 'langchain Python REPL (llama-3.1-8B-instruct)': 1001.9470348406869, 'langchain Google Lens (gpt-4o-mini-2024-07-18)': 1000.0, 'langchain Google Lens (llama-3.1-8B-instruct)': 1000.3044473640224, 'langchain Google Lens (llama-3.1-70B-instruct)': 998.0001334887187, 'sql agent plotter llamaindex (llama-3.1-8B-instruct)': 997.955653911938, 'sql agent plotter llamaindex (open-mixtral-8x7b)': 1000.0114469004035, 'sql agent plotter llamaindex (claude-3-5-sonnet-20240620)': 1003.987606910614, 'langchain Pandas DataFrame (claude-3-haiku-20240307)': 1009.8292470533806, 'sql agent plotter llamaindex (gemini-1.5-pro-001)': 1001.9886267372632, 'sql agent plotter langchain (gemini-1.5-flash-001)': 992.1254268235713, 'sql agent plotter llamaindex (mistral-large-2407)': 998.0000655187448, 'langchain Pandas DataFrame (llama-3.1-70B-instruct)': 1001.9771724253927, 'langchain Golden Query Integration (claude-3-opus-20240229)': 1001.9884209373072, 'langchain Golden Query Integration (mistral-large-2407)': 997.9499171494605, 'crewai AI Crew for Trip Planning (gemini-1.5-pro-002)': 998.0586507768909, 'llamaindex OpenWeatherMap (gemini-1.5-flash-002)': 997.9884872017008, 'langchain GraphQL API Integration (gpt-4-0613)': 1002.0, 'langchain Golden Query Integration (claude-3-haiku-20240307)': 998.0, 'langchain alpha-vantage stock agent (gemini-1.5-pro-002)': 1000.023024052571, 'llamaindex ArXiv Article Fetcher (gemini-1.5-flash-002)': 998.010493399986})\n"
          ]
        },
        {
          "data": {
            "text/html": [
              "<div>\n",
              "<style scoped>\n",
              "    .dataframe tbody tr th:only-of-type {\n",
              "        vertical-align: middle;\n",
              "    }\n",
              "\n",
              "    .dataframe tbody tr th {\n",
              "        vertical-align: top;\n",
              "    }\n",
              "\n",
              "    .dataframe thead th {\n",
              "        text-align: right;\n",
              "    }\n",
              "</style>\n",
              "<table border=\"1\" class=\"dataframe\">\n",
              "  <thead>\n",
              "    <tr style=\"text-align: right;\">\n",
              "      <th></th>\n",
              "      <th>Model</th>\n",
              "      <th>Elo rating</th>\n",
              "    </tr>\n",
              "  </thead>\n",
              "  <tbody>\n",
              "    <tr>\n",
              "      <th>1</th>\n",
              "      <td>llamaindex brave-search agent (gpt-4o-2024-08-06)</td>\n",
              "      <td>1048.85</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>2</th>\n",
              "      <td>langchain Wikipedia (claude-3-5-sonnet-20240620)</td>\n",
              "      <td>1038.12</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>3</th>\n",
              "      <td>langchain Wikipedia (open-mixtral-8x22b)</td>\n",
              "      <td>1037.76</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>4</th>\n",
              "      <td>langchain Wikipedia (llama-3.1-70B-instruct)</td>\n",
              "      <td>1034.49</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>5</th>\n",
              "      <td>langchain Wikipedia (open-mixtral-8x7b)</td>\n",
              "      <td>1033.36</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>...</th>\n",
              "      <td>...</td>\n",
              "      <td>...</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>443</th>\n",
              "      <td>langchain Wolfram Alpha (gemini-1.5-flash-001)</td>\n",
              "      <td>980.81</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>444</th>\n",
              "      <td>langchain Wikipedia (llama-3.1-8B-instruct)</td>\n",
              "      <td>977.79</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>445</th>\n",
              "      <td>llamaindex wikipedia (llama-3.1-405B-instruct)</td>\n",
              "      <td>977.21</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>446</th>\n",
              "      <td>langchain Wikipedia (gemini-1.5-flash-001)</td>\n",
              "      <td>977.03</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>447</th>\n",
              "      <td>openai general assistant (gpt-4o-2024-08-06)</td>\n",
              "      <td>947.47</td>\n",
              "    </tr>\n",
              "  </tbody>\n",
              "</table>\n",
              "<p>447 rows × 2 columns</p>\n",
              "</div>"
            ],
            "text/plain": [
              "                                                 Model  Elo rating\n",
              "1    llamaindex brave-search agent (gpt-4o-2024-08-06)     1048.85\n",
              "2     langchain Wikipedia (claude-3-5-sonnet-20240620)     1038.12\n",
              "3             langchain Wikipedia (open-mixtral-8x22b)     1037.76\n",
              "4         langchain Wikipedia (llama-3.1-70B-instruct)     1034.49\n",
              "5              langchain Wikipedia (open-mixtral-8x7b)     1033.36\n",
              "..                                                 ...         ...\n",
              "443     langchain Wolfram Alpha (gemini-1.5-flash-001)      980.81\n",
              "444        langchain Wikipedia (llama-3.1-8B-instruct)      977.79\n",
              "445     llamaindex wikipedia (llama-3.1-405B-instruct)      977.21\n",
              "446         langchain Wikipedia (gemini-1.5-flash-001)      977.03\n",
              "447       openai general assistant (gpt-4o-2024-08-06)      947.47\n",
              "\n",
              "[447 rows x 2 columns]"
            ]
          },
          "execution_count": 131,
          "metadata": {},
          "output_type": "execute_result"
        }
      ],
      "source": [
        "def preety_print_model_ratings(ratings):\n",
        "    df = pd.DataFrame([\n",
        "        [n, ratings[n]] for n in ratings.keys()\n",
        "    ], columns=[\"Model\", \"Elo rating\"]).sort_values(\"Elo rating\", ascending=False).reset_index(drop=True)\n",
        "    # df[\"Elo rating\"] = (df[\"Elo rating\"] + 0.5).astype(int)\n",
        "    df.index = df.index + 1\n",
        "    return df\n",
        "\n",
        "online_elo_ratings = compute_online_elo(ratings)\n",
        "print((online_elo_ratings))\n",
        "preety_print_model_ratings(online_elo_ratings)"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 132,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 424
        },
        "id": "tcQ90su8gadr",
        "outputId": "bf321d9d-8de9-49e2-d7eb-8bdf097f450b"
      },
      "outputs": [
        {
          "data": {
            "text/html": [
              "<div>\n",
              "<style scoped>\n",
              "    .dataframe tbody tr th:only-of-type {\n",
              "        vertical-align: middle;\n",
              "    }\n",
              "\n",
              "    .dataframe tbody tr th {\n",
              "        vertical-align: top;\n",
              "    }\n",
              "\n",
              "    .dataframe thead th {\n",
              "        text-align: right;\n",
              "    }\n",
              "</style>\n",
              "<table border=\"1\" class=\"dataframe\">\n",
              "  <thead>\n",
              "    <tr style=\"text-align: right;\">\n",
              "      <th></th>\n",
              "      <th>Model</th>\n",
              "      <th>Elo rating</th>\n",
              "      <th>Elo rating with reverse order</th>\n",
              "    </tr>\n",
              "  </thead>\n",
              "  <tbody>\n",
              "    <tr>\n",
              "      <th>1</th>\n",
              "      <td>llamaindex brave-search agent (gpt-4o-2024-08-06)</td>\n",
              "      <td>1049</td>\n",
              "      <td>1045</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>2</th>\n",
              "      <td>langchain Wikipedia (claude-3-5-sonnet-20240620)</td>\n",
              "      <td>1038</td>\n",
              "      <td>1041</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>3</th>\n",
              "      <td>langchain Wikipedia (open-mixtral-8x22b)</td>\n",
              "      <td>1038</td>\n",
              "      <td>1035</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>4</th>\n",
              "      <td>langchain Wikipedia (llama-3.1-70B-instruct)</td>\n",
              "      <td>1034</td>\n",
              "      <td>1036</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>5</th>\n",
              "      <td>langchain Wikipedia (open-mixtral-8x7b)</td>\n",
              "      <td>1033</td>\n",
              "      <td>1037</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>...</th>\n",
              "      <td>...</td>\n",
              "      <td>...</td>\n",
              "      <td>...</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>443</th>\n",
              "      <td>langchain Wolfram Alpha (gemini-1.5-flash-001)</td>\n",
              "      <td>981</td>\n",
              "      <td>981</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>444</th>\n",
              "      <td>langchain Wikipedia (llama-3.1-8B-instruct)</td>\n",
              "      <td>978</td>\n",
              "      <td>981</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>445</th>\n",
              "      <td>llamaindex wikipedia (llama-3.1-405B-instruct)</td>\n",
              "      <td>977</td>\n",
              "      <td>977</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>446</th>\n",
              "      <td>langchain Wikipedia (gemini-1.5-flash-001)</td>\n",
              "      <td>977</td>\n",
              "      <td>980</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>447</th>\n",
              "      <td>openai general assistant (gpt-4o-2024-08-06)</td>\n",
              "      <td>947</td>\n",
              "      <td>949</td>\n",
              "    </tr>\n",
              "  </tbody>\n",
              "</table>\n",
              "<p>447 rows × 3 columns</p>\n",
              "</div>"
            ],
            "text/plain": [
              "                                                 Model  Elo rating  \\\n",
              "1    llamaindex brave-search agent (gpt-4o-2024-08-06)        1049   \n",
              "2     langchain Wikipedia (claude-3-5-sonnet-20240620)        1038   \n",
              "3             langchain Wikipedia (open-mixtral-8x22b)        1038   \n",
              "4         langchain Wikipedia (llama-3.1-70B-instruct)        1034   \n",
              "5              langchain Wikipedia (open-mixtral-8x7b)        1033   \n",
              "..                                                 ...         ...   \n",
              "443     langchain Wolfram Alpha (gemini-1.5-flash-001)         981   \n",
              "444        langchain Wikipedia (llama-3.1-8B-instruct)         978   \n",
              "445     llamaindex wikipedia (llama-3.1-405B-instruct)         977   \n",
              "446         langchain Wikipedia (gemini-1.5-flash-001)         977   \n",
              "447       openai general assistant (gpt-4o-2024-08-06)         947   \n",
              "\n",
              "     Elo rating with reverse order  \n",
              "1                             1045  \n",
              "2                             1041  \n",
              "3                             1035  \n",
              "4                             1036  \n",
              "5                             1037  \n",
              "..                             ...  \n",
              "443                            981  \n",
              "444                            981  \n",
              "445                            977  \n",
              "446                            980  \n",
              "447                            949  \n",
              "\n",
              "[447 rows x 3 columns]"
            ]
          },
          "execution_count": 132,
          "metadata": {},
          "output_type": "execute_result"
        }
      ],
      "source": [
        "def preety_print_two_ratings(ratings_1, ratings_2, column_names):\n",
        "    df = pd.DataFrame([\n",
        "        [n, ratings_1[n], ratings_2[n]] for n in ratings_1.keys()\n",
        "    ], columns=[\"Model\", column_names[0], column_names[1]]).sort_values(column_names[0], ascending=False).reset_index(drop=True)\n",
        "    df[column_names[0]] = (df[column_names[0]] + 0.5).astype(int)\n",
        "    df[column_names[1]] = (df[column_names[1]] + 0.5).astype(int)\n",
        "    df.index = df.index + 1\n",
        "    return df\n",
        "\n",
        "elo_mle_ratings_reverse = compute_online_elo(ratings.iloc[::-1])\n",
        "preety_print_two_ratings(online_elo_ratings,\n",
        "                         elo_mle_ratings_reverse,\n",
        "                         column_names=[\"Elo rating\", \"Elo rating with reverse order\"])"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "owTIB3K3HK6U"
      },
      "source": [
        " ### Maximum Likelihood Estimation with [Bradley-Terry model](https://en.wikipedia.org/wiki/Bradley%E2%80%93Terry_model), (Agents)\n",
        "\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 133,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 493
        },
        "id": "bTUQ2nOfgsZm",
        "outputId": "c564f150-3445-4e2c-c4ce-8867bf45cfd2"
      },
      "outputs": [
        {
          "data": {
            "text/plain": [
              "{'Search Engines': langchain google-serper search agent (llama-3.1-405B-instruct)      1436.14\n",
              " langchain google-serper search agent (gemini-1.5-pro-001)           1362.50\n",
              " langchain brave-search agent (llama-3.1-70B-instruct)               1333.18\n",
              " langchain brave-search agent (claude-3-opus-20240229)               1259.46\n",
              " langchain You.com Search (gemini-1.5-pro-001)                       1212.61\n",
              " langchain google-serper search agent (open-mixtral-8x7b)            1190.18\n",
              " langchain You.com Search (gpt-4-turbo-2024-04-09)                   1188.46\n",
              " langchain google-serper search agent (open-mixtral-8x22b)           1184.37\n",
              " langchain brave-search agent (gemini-1.5-pro-001)                   1176.20\n",
              " langchain brave-search agent (open-mixtral-8x7b)                    1172.10\n",
              " langchain brave-search agent (gpt-4o-2024-08-06)                    1169.43\n",
              " langchain brave-search agent (gpt-4-turbo-2024-04-09)               1169.15\n",
              " llamaindex brave-search agent (gpt-4o-2024-08-06)                   1140.08\n",
              " langchain google-serper search agent (llama-3.1-8B-instruct)        1126.97\n",
              " langchain google-serper search agent (gpt-4-turbo-2024-04-09)       1124.28\n",
              " langchain google-serper search agent (claude-3-5-sonnet-20240620)   1112.98\n",
              " langchain You.com Search (gpt-4o-2024-08-06)                        1090.21\n",
              " langchain brave-search agent (llama-3.1-8B-instruct)                1086.50\n",
              " langchain brave-search agent (gemini-1.5-flash-001)                 1084.03\n",
              " langchain brave-search agent (claude-3-5-sonnet-20240620)           1083.04\n",
              " langchain google-serper search agent (gemini-1.5-flash-001)         1077.91\n",
              " langchain google-serper search agent (gpt-4o-mini-2024-07-18)       1070.84\n",
              " langchain google-serper search agent (gpt-4o-2024-08-06)            1065.48\n",
              " langchain brave-search agent (open-mixtral-8x22b)                   1061.85\n",
              " langchain brave-search agent (mistral-large-2407)                   1052.82\n",
              " langchain google-serper search agent (gpt-4o-2024-05-13)            1051.41\n",
              " langchain brave-search agent (gpt-4o-2024-05-13)                    1049.06\n",
              " langchain google-serper search agent (mistral-large-2407)           1046.21\n",
              " langchain google-serper search agent (gpt-4-0613)                   1038.94\n",
              " langchain Tavily Search (gpt-4o-2024-08-06)                         1029.65\n",
              " langchain google-serper search agent (claude-3-opus-20240229)       1015.88\n",
              " langchain brave-search agent (llama-3.1-405B-instruct)               956.61\n",
              " langchain You.com Search (gpt-4-0613)                                945.60\n",
              " langchain You.com Search (mistral-large-2407)                        944.60\n",
              " langchain brave-search agent (claude-3-haiku-20240307)               943.03\n",
              " langchain You.com Search (gpt-4o-mini-2024-07-18)                    937.79\n",
              " langchain brave-search agent (gpt-4o-mini-2024-07-18)                933.70\n",
              " langchain google-serper search agent (llama-3.1-70B-instruct)        916.79\n",
              " langchain google-serper search agent (claude-3-haiku-20240307)       915.66\n",
              " langchain You.com Search (claude-3-haiku-20240307)                   863.62\n",
              " langchain Exa Search Integration (open-mixtral-8x22b)                862.73\n",
              " langchain You.com Search (gemini-1.5-flash-001)                      801.73\n",
              " llamaindex brave-search agent (gemini-1.5-pro-001)                   767.54\n",
              " langchain Exa Search Integration (claude-3-5-sonnet-20240620)        742.35\n",
              " langchain You.com Search (claude-3-5-sonnet-20240620)                711.14\n",
              " llamaindex brave-search agent (claude-3-opus-20240229)               707.94\n",
              " llamaindex brave-search agent (open-mixtral-8x22b)                   691.90\n",
              " langchain You.com Search (open-mixtral-8x7b)                         668.69\n",
              " langchain You.com Search (gpt-4o-2024-05-13)                         614.64\n",
              " langchain Tavily Search (gemini-1.5-flash-001)                       614.64\n",
              " langchain You.com Search (open-mixtral-8x22b)                        614.64\n",
              " langchain You.com Search (claude-3-opus-20240229)                    582.77\n",
              " dtype: float64,\n",
              " 'Simple Math': langchain google-serper search agent (llama-3.1-70B-instruct)       1462.62\n",
              " langchain google-serper search agent (gpt-4o-mini-2024-07-18)       1451.97\n",
              " langchain google-serper search agent (llama-3.1-405B-instruct)      1431.57\n",
              " langchain google-serper search agent (gpt-4-turbo-2024-04-09)       1391.51\n",
              " langchain google-serper search agent (gemini-1.5-pro-001)           1333.70\n",
              " langchain google-serper search agent (open-mixtral-8x22b)           1305.06\n",
              " langchain google-serper search agent (gpt-4-0613)                   1274.74\n",
              " langchain Wolfram Alpha (open-mixtral-8x7b)                         1253.43\n",
              " langchain Wolfram Alpha (mistral-large-2407)                        1225.76\n",
              " langchain google-serper search agent (claude-3-haiku-20240307)      1185.51\n",
              " langchain Wolfram Alpha (llama-3.1-405B-instruct)                   1149.08\n",
              " langchain Wolfram Alpha (open-mixtral-8x22b)                        1130.66\n",
              " langchain You.com Search (gemini-1.5-pro-001)                       1123.58\n",
              " langchain Wolfram Alpha (claude-3-5-sonnet-20240620)                1119.90\n",
              " langchain You.com Search (gpt-4-turbo-2024-04-09)                   1118.54\n",
              " langchain Wolfram Alpha (gpt-4o-mini-2024-07-18)                    1112.10\n",
              " langchain Wolfram Alpha (llama-3.1-8B-instruct)                     1067.70\n",
              " langchain Wolfram Alpha (gpt-4-turbo-2024-04-09)                    1062.04\n",
              " anthropic calculator tool (claude-3-haiku-20240307)                 1055.76\n",
              " langchain Wolfram Alpha (llama-3.1-70B-instruct)                    1045.30\n",
              " anthropic calculator tool (claude-3-5-sonnet-20240620)              1043.29\n",
              " langchain google-serper search agent (gpt-4o-2024-08-06)            1008.77\n",
              " langchain google-serper search agent (gpt-4o-2024-05-13)             989.17\n",
              " langchain google-serper search agent (mistral-large-2407)            978.71\n",
              " anthropic calculator tool (claude-3-opus-20240229)                   966.57\n",
              " langchain google-serper search agent (llama-3.1-8B-instruct)         957.51\n",
              " langchain google-serper search agent (claude-3-opus-20240229)        953.34\n",
              " langchain Wolfram Alpha (gpt-4o-2024-08-06)                          932.60\n",
              " langchain You.com Search (mistral-large-2407)                        930.48\n",
              " langchain Wolfram Alpha (claude-3-opus-20240229)                     918.18\n",
              " langchain You.com Search (gpt-4o-2024-05-13)                         901.71\n",
              " langchain You.com Search (claude-3-5-sonnet-20240620)                897.96\n",
              " langchain Wolfram Alpha (claude-3-haiku-20240307)                    875.53\n",
              " langchain You.com Search (gpt-4o-2024-08-06)                         871.54\n",
              " langchain You.com Search (gpt-4-0613)                                864.72\n",
              " langchain Wolfram Alpha (gpt-4o-2024-05-13)                          862.01\n",
              " langchain google-serper search agent (claude-3-5-sonnet-20240620)    847.01\n",
              " langchain google-serper search agent (open-mixtral-8x7b)             835.04\n",
              " langchain You.com Search (gpt-4o-mini-2024-07-18)                    822.66\n",
              " langchain You.com Search (claude-3-haiku-20240307)                   807.94\n",
              " langchain You.com Search (open-mixtral-8x7b)                         765.97\n",
              " langchain google-serper search agent (gemini-1.5-flash-001)          747.64\n",
              " langchain Wolfram Alpha (gemini-1.5-pro-001)                         740.38\n",
              " langchain You.com Search (gemini-1.5-flash-001)                      719.91\n",
              " llamaindex Wolfram Alpha (open-mixtral-8x22b)                        705.90\n",
              " langchain You.com Search (open-mixtral-8x22b)                        646.80\n",
              " langchain Wolfram Alpha (gemini-1.5-flash-001)                       614.20\n",
              " langchain You.com Search (claude-3-opus-20240229)                    493.91\n",
              " dtype: float64,\n",
              " 'Knowledge Bases': langchain OpenWeatherMap (gpt-4o-2024-08-06)            1279.13\n",
              " langchain OpenWeatherMap (claude-3-opus-20240229)       1277.21\n",
              " langchain OpenWeatherMap (gpt-4o-mini-2024-07-18)       1277.21\n",
              " langchain OpenWeatherMap (gemini-1.5-flash-001)         1277.21\n",
              " langchain brave-search agent (llama-3.1-70B-instruct)   1229.20\n",
              "                                                           ...  \n",
              " langchain Tavily Search (gemini-1.5-flash-001)           606.06\n",
              " langchain You.com Search (open-mixtral-8x22b)            606.06\n",
              " langchain You.com Search (open-mixtral-8x7b)             591.25\n",
              " langchain You.com Search (claude-3-opus-20240229)        536.13\n",
              " llamaindex wikipedia (mistral-large-2407)                413.64\n",
              " Length: 110, dtype: float64,\n",
              " 'Math/CS Academic Search': langchain ArXiv Article Fetcher (open-mixtral-8x7b)       1379.85\n",
              " langchain ArXiv Article Fetcher (gpt-4o-2024-05-13)       1311.80\n",
              " langchain brave-search agent (llama-3.1-70B-instruct)     1241.04\n",
              " llamaindex ArXiv Article Fetcher (gpt-4o-2024-05-13)      1232.97\n",
              " langchain brave-search agent (claude-3-opus-20240229)     1226.35\n",
              "                                                             ...  \n",
              " langchain You.com Search (open-mixtral-8x7b)               605.04\n",
              " langchain ArXiv Article Fetcher (llama-3.1-8B-instruct)    549.48\n",
              " langchain You.com Search (claude-3-opus-20240229)          545.00\n",
              " langchain ArXiv Article Fetcher (mistral-large-2407)       483.58\n",
              " llamaindex wikipedia (mistral-large-2407)                  420.69\n",
              " Length: 103, dtype: float64,\n",
              " 'Code Interpreter': crewai AI Crew for Game Building (llama-3.1-8B-instruct)     1357.67\n",
              " langchain Python REPL (gpt-4o-2024-05-13)                    1277.61\n",
              " langchain Python REPL (gemini-1.5-flash-001)                 1256.31\n",
              " sql agent plotter langchain (gpt-4o-2024-05-13)              1215.91\n",
              " sql agent plotter langchain (gemini-1.5-pro-001)             1210.32\n",
              " langchain Shell (gpt-4o-mini-2024-07-18)                     1210.32\n",
              " sql agent plotter llamaindex (gpt-4o-2024-05-13)             1200.50\n",
              " langchain Python REPL (mistral-large-2407)                   1176.23\n",
              " langchain Shell (claude-3-5-sonnet-20240620)                 1161.07\n",
              " langchain Python REPL (llama-3.1-70B-instruct)               1151.31\n",
              " langchain Python REPL (claude-3-5-sonnet-20240620)           1139.59\n",
              " langchain Python REPL (gpt-4-turbo-2024-04-09)               1056.70\n",
              " openai assistant code interpreter (gpt-4o-2024-08-06)        1040.43\n",
              " sql agent plotter llamaindex (gpt-4o-2024-08-06)             1014.63\n",
              " openai assistant code interpreter (gpt-4-turbo-2024-04-09)    924.31\n",
              " llamaindex code interpreter (gpt-4-turbo-2024-04-09)          823.77\n",
              " sql agent plotter llamaindex (mistral-large-2407)             789.68\n",
              " openai general assistant (gpt-4o-2024-08-06)                  789.68\n",
              " sql agent plotter langchain (gpt-4-turbo-2024-04-09)          784.48\n",
              " sql agent plotter llamaindex (gpt-4-turbo-2024-04-09)         784.48\n",
              " llamaindex code interpreter (gpt-4o-mini-2024-07-18)          743.69\n",
              " llamaindex code interpreter (gpt-4o-2024-05-13)               722.39\n",
              " llamaindex code interpreter (gemini-1.5-pro-001)              642.64\n",
              " langchain Python REPL (gpt-4o-2024-08-06)                     526.28\n",
              " dtype: float64}"
            ]
          },
          "execution_count": 133,
          "metadata": {},
          "output_type": "execute_result"
        }
      ],
      "source": [
        "import numpy as np\n",
        "import math\n",
        "import pandas as pd\n",
        "from sklearn.linear_model import LogisticRegression\n",
        "from IPython.display import display\n",
        "\n",
        "def compute_mle_elo_by_category(df, SCALE=400, BASE=10, INIT_RATING=1000, sample_weight=None):\n",
        "    # Define new categories\n",
        "    categories = {\n",
        "        'Search Engines': { 'normal': '#c5cae9', 'hover': '#9fa8da' },\n",
        "        'Simple Math': { 'normal': '#ffd3b6', 'hover': '#ffbfa0' },\n",
        "        'Knowledge Bases': { 'normal': '#ffe0b2', 'hover': '#ffcc80' },\n",
        "        'Math/CS Academic Search': { 'normal': '#ffccbc', 'hover': '#ffab91' },\n",
        "        'Code Interpreter': { 'normal': '#a9cce3', 'hover': '#87bdd8' }\n",
        "    }\n",
        "    \n",
        "    results = {}\n",
        "\n",
        "    def category_overlap(cat_a, cat_b):\n",
        "        \"\"\"Check if there is any overlap between two category lists.\"\"\"\n",
        "        return len(set(cat_a).intersection(set(cat_b))) > 0\n",
        "\n",
        "    for category in categories.keys():\n",
        "        # Filter DataFrame to get all battles where Agent_A and Agent_B share any category with the current category\n",
        "        category_df = df[\n",
        "            df.apply(lambda x: category_overlap(x['Agent_A']['Category'], [category]) and\n",
        "                               category_overlap(x['Agent_B']['Category'], [category]), axis=1)\n",
        "        ]\n",
        "\n",
        "        if len(category_df) == 0:\n",
        "            continue  # Skip categories with no battles\n",
        "\n",
        "        # Create a DataFrame with agent names and ratings for this category\n",
        "        battle_df = pd.DataFrame({\n",
        "            'leftAgent': category_df['Agent_A'].apply(lambda x: x['Agent name']),\n",
        "            'rightAgent': category_df['Agent_B'].apply(lambda x: x['Agent name']),\n",
        "            'rating': category_df['Rating']\n",
        "        })\n",
        "\n",
        "        # Create win/tie/loss pivot tables\n",
        "        ptbl_a_win = pd.pivot_table(\n",
        "            battle_df[battle_df[\"rating\"] == \"A is better\"],\n",
        "            index=\"leftAgent\",\n",
        "            columns=\"rightAgent\",\n",
        "            aggfunc=\"size\",\n",
        "            fill_value=0,\n",
        "        )\n",
        "        all_agents = pd.Index(ptbl_a_win.index.union(ptbl_a_win.columns))\n",
        "\n",
        "        ptbl_a_win = ptbl_a_win.reindex(index=all_agents, columns=all_agents, fill_value=0)\n",
        "\n",
        "        if sum(battle_df[\"rating\"].isin([\"Tie\", \"Both are bad\"])) == 0:\n",
        "            ptbl_tie = pd.DataFrame(0, index=ptbl_a_win.index, columns=ptbl_a_win.columns)\n",
        "        else:\n",
        "            ptbl_tie = pd.pivot_table(\n",
        "                battle_df[battle_df[\"rating\"].isin([\"Tie\", \"Both are bad\"])],\n",
        "                index=\"leftAgent\",\n",
        "                columns=\"rightAgent\",\n",
        "                aggfunc=\"size\",\n",
        "                fill_value=0,\n",
        "            )\n",
        "            ptbl_tie = ptbl_tie.reindex(index=all_agents, columns=all_agents, fill_value=0)\n",
        "            ptbl_tie = ptbl_tie + ptbl_tie.T\n",
        "\n",
        "        ptbl_b_win = pd.pivot_table(\n",
        "            battle_df[battle_df[\"rating\"] == \"B is better\"],\n",
        "            index=\"leftAgent\",\n",
        "            columns=\"rightAgent\",\n",
        "            aggfunc=\"size\",\n",
        "            fill_value=0,\n",
        "        )\n",
        "        ptbl_b_win = ptbl_b_win.reindex(index=all_agents, columns=all_agents, fill_value=0)\n",
        "\n",
        "        ptbl_win = ptbl_a_win * 2 + ptbl_b_win.T * 2 + ptbl_tie\n",
        "\n",
        "        agents = pd.Series(np.arange(len(ptbl_win.index)), index=ptbl_win.index)\n",
        "\n",
        "        p = len(agents)\n",
        "        X = np.zeros([p * (p - 1) * 2, p])\n",
        "        Y = np.zeros(p * (p - 1) * 2)\n",
        "\n",
        "        cur_row = 0\n",
        "        sample_weights = []\n",
        "        for m_a in ptbl_win.index:\n",
        "            for m_b in ptbl_win.columns:\n",
        "                if m_a == m_b:\n",
        "                    continue\n",
        "                if math.isnan(ptbl_win.loc[m_a, m_b]) or math.isnan(ptbl_win.loc[m_b, m_a]):\n",
        "                    continue\n",
        "                X[cur_row, agents[m_a]] = +math.log(BASE)\n",
        "                X[cur_row, agents[m_b]] = -math.log(BASE)\n",
        "                Y[cur_row] = 1.0\n",
        "                sample_weights.append(ptbl_win.loc[m_a, m_b])\n",
        "\n",
        "                X[cur_row + 1, agents[m_a]] = math.log(BASE)\n",
        "                X[cur_row + 1, agents[m_b]] = -math.log(BASE)\n",
        "                Y[cur_row + 1] = 0.0\n",
        "                sample_weights.append(ptbl_win.loc[m_b, m_a])\n",
        "                cur_row += 2\n",
        "        X = X[:cur_row]\n",
        "        Y = Y[:cur_row]\n",
        "\n",
        "        if len(X) > 0:\n",
        "            lr = LogisticRegression(fit_intercept=False, penalty='l2', C=0.7, tol=1e-6)\n",
        "            lr.fit(X, Y, sample_weight=sample_weights)\n",
        "            elo_scores = SCALE * lr.coef_[0] + INIT_RATING\n",
        "            results[category] = pd.Series(elo_scores, index=agents.index).sort_values(ascending=False)\n",
        "        else:\n",
        "            results[category] = pd.Series(dtype=float)  # Empty series for categories with no valid battles\n",
        "\n",
        "    return results\n",
        "\n",
        "# Function to write MLE ELO ratings to a text file\n",
        "def write_elo_ratings_to_file(ratings_by_category, filename=\"elo_ratings_by_category.txt\"):\n",
        "    with open(filename, \"w\") as file:\n",
        "        for category, ratings in ratings_by_category.items():\n",
        "            file.write(f\"ELO Ratings for {category} category:\\n\")\n",
        "            for agent, rating in ratings.items():\n",
        "                file.write(f\"{agent}: {rating}\\n\")\n",
        "            file.write(\"\\n\")\n",
        "\n",
        "# Example usage:\n",
        "mle_elo_ratings_by_category = compute_mle_elo_by_category(ratings)\n",
        "write_elo_ratings_to_file(mle_elo_ratings_by_category)\n",
        "mle_elo_ratings_by_category\n"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "ZuTV9GuQFqJe"
      },
      "source": [
        " ### Maximum Likelihood Estimation with [Bradley-Terry model](https://en.wikipedia.org/wiki/Bradley%E2%80%93Terry_model) (Models)\n",
        "\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 134,
      "metadata": {
        "id": "QTa5A9QpG9bW"
      },
      "outputs": [],
      "source": [
        "import matplotlib.pyplot as plt\n",
        "import numpy as np\n",
        "import math\n",
        "import pandas as pd\n",
        "from sklearn.linear_model import LogisticRegression\n",
        "from IPython.display import display\n",
        "\n",
        "\n",
        "def compute_mle_elo_models(df, SCALE=400, BASE=10, INIT_RATING=1000, sample_weight=None):\n",
        "    ptbl_a_win = pd.pivot_table(\n",
        "        df[df[\"rating\"] == \"A is better\"],\n",
        "        index=\"leftSubcomponent\",\n",
        "        columns=\"rightSubcomponent\",\n",
        "        aggfunc=\"size\",\n",
        "        fill_value=0,\n",
        "    )\n",
        "    all_models = pd.Index(ptbl_a_win.index.union(ptbl_a_win.columns))\n",
        "\n",
        "    # Reindex the pivot table to be square, filling missing values with 0\n",
        "    print(ptbl_a_win.shape)\n",
        "\n",
        "    ptbl_a_win = ptbl_a_win.reindex(index=all_models, columns=all_models, fill_value=0)\n",
        "    print(ptbl_a_win.shape)\n",
        "\n",
        "\n",
        "    # Handle the case where there are no ties\n",
        "    if sum(df[\"rating\"].isin([\"Tie\", \"Both are bad\"])) == 0:\n",
        "        ptbl_tie = pd.DataFrame(0, index=ptbl_a_win.index, columns=ptbl_a_win.columns)\n",
        "    else:\n",
        "        ptbl_tie = pd.pivot_table(\n",
        "            df[df[\"rating\"].isin([\"Tie\", \"Both are bad\"])],\n",
        "            index=\"leftSubcomponent\",\n",
        "            columns=\"rightSubcomponent\",\n",
        "            aggfunc=\"size\",\n",
        "            fill_value=0,\n",
        "        )\n",
        "        # Get all unique models from both leftAgent and rightAgent columns\n",
        "        all_models = pd.Index(ptbl_tie.index.union(ptbl_tie.columns))\n",
        "\n",
        "        # Reindex the pivot table to be square, filling missing values with 0\n",
        "        ptbl_tie = ptbl_tie.reindex(index=all_models, columns=all_models, fill_value=0)\n",
        "        ptbl_tie = ptbl_tie + ptbl_tie.T\n",
        "\n",
        "    ptbl_b_win = pd.pivot_table(\n",
        "        df[df[\"rating\"] == \"B is better\"],\n",
        "        index=\"leftSubcomponent\",\n",
        "        columns=\"rightSubcomponent\",\n",
        "        aggfunc=\"size\",\n",
        "        fill_value=0,\n",
        "    )\n",
        "    print(ptbl_b_win.shape)\n",
        "\n",
        "    all_models = pd.Index(ptbl_b_win.index.union(ptbl_b_win.columns))\n",
        "\n",
        "    # Reindex the pivot table to be square, filling missing values with 0\n",
        "    ptbl_b_win = ptbl_b_win.reindex(index=all_models, columns=all_models, fill_value=0)\n",
        "    print(ptbl_b_win.shape)\n",
        "\n",
        "    # Combine pivot tables\n",
        "    ptbl_win = ptbl_a_win * 2 + ptbl_b_win.T * 2 + ptbl_tie\n",
        "\n",
        "    # Visualize the combined pivot table\n",
        "    # visualize_ptbl_win(ptbl_win)\n",
        "    models = pd.Series(np.arange(len(ptbl_win.index)), index=ptbl_win.index)\n",
        "\n",
        "    p = len(models)\n",
        "    X = np.zeros([p * (p - 1) * 2, p])\n",
        "    Y = np.zeros(p * (p - 1) * 2)\n",
        "\n",
        "    cur_row = 0\n",
        "    sample_weights = []\n",
        "    for m_a in ptbl_win.index:\n",
        "        for m_b in ptbl_win.columns:\n",
        "            if m_a == m_b:\n",
        "                continue\n",
        "            # Skip if NaN\n",
        "            if math.isnan(ptbl_win.loc[m_a, m_b]) or math.isnan(ptbl_win.loc[m_b, m_a]):\n",
        "                continue\n",
        "            X[cur_row, models[m_a]] = +math.log(BASE)\n",
        "            X[cur_row, models[m_b]] = -math.log(BASE)\n",
        "            Y[cur_row] = 1.0\n",
        "            sample_weights.append(ptbl_win.loc[m_a, m_b])\n",
        "\n",
        "            X[cur_row + 1, models[m_a]] = math.log(BASE)\n",
        "            X[cur_row + 1, models[m_b]] = -math.log(BASE)\n",
        "            Y[cur_row + 1] = 0.0\n",
        "            sample_weights.append(ptbl_win.loc[m_b, m_a])\n",
        "            cur_row += 2\n",
        "    X = X[:cur_row]\n",
        "    Y = Y[:cur_row]\n",
        "\n",
        "    lr = LogisticRegression(fit_intercept=False, penalty=None, tol=1e-6)\n",
        "    lr.fit(X, Y, sample_weight=sample_weights)\n",
        "    elo_scores = SCALE * lr.coef_[0] + INIT_RATING\n",
        "    return pd.Series(elo_scores, index=models.index).sort_values(ascending=False)"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 135,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 621
        },
        "id": "PB1TAGJUHwGD",
        "outputId": "77dc22e5-953e-4e26-d45b-861c67de0137"
      },
      "outputs": [
        {
          "name": "stdout",
          "output_type": "stream",
          "text": [
            "(16, 16)\n",
            "(16, 16)\n",
            "(16, 16)\n",
            "(16, 16)\n"
          ]
        },
        {
          "data": {
            "text/html": [
              "<div>\n",
              "<style scoped>\n",
              "    .dataframe tbody tr th:only-of-type {\n",
              "        vertical-align: middle;\n",
              "    }\n",
              "\n",
              "    .dataframe tbody tr th {\n",
              "        vertical-align: top;\n",
              "    }\n",
              "\n",
              "    .dataframe thead th {\n",
              "        text-align: right;\n",
              "    }\n",
              "</style>\n",
              "<table border=\"1\" class=\"dataframe\">\n",
              "  <thead>\n",
              "    <tr style=\"text-align: right;\">\n",
              "      <th></th>\n",
              "      <th>Model</th>\n",
              "      <th>Elo rating</th>\n",
              "    </tr>\n",
              "  </thead>\n",
              "  <tbody>\n",
              "    <tr>\n",
              "      <th>1</th>\n",
              "      <td>open-mixtral-8x7b</td>\n",
              "      <td>1056.02</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>2</th>\n",
              "      <td>llama-3.1-70B-instruct</td>\n",
              "      <td>1049.77</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>3</th>\n",
              "      <td>open-mixtral-8x22b</td>\n",
              "      <td>1048.15</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>4</th>\n",
              "      <td>claude-3-5-sonnet-20240620</td>\n",
              "      <td>1037.36</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>5</th>\n",
              "      <td>claude-3-opus-20240229</td>\n",
              "      <td>1025.12</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>6</th>\n",
              "      <td>gpt-4o-2024-05-13</td>\n",
              "      <td>1020.90</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>7</th>\n",
              "      <td>gpt-4o-2024-08-06</td>\n",
              "      <td>1017.88</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>8</th>\n",
              "      <td>gpt-4o-mini-2024-07-18</td>\n",
              "      <td>1014.26</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>9</th>\n",
              "      <td>gpt-4-turbo-2024-04-09</td>\n",
              "      <td>1011.87</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>10</th>\n",
              "      <td>claude-3-haiku-20240307</td>\n",
              "      <td>997.59</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>11</th>\n",
              "      <td>llama-3.1-405B-instruct</td>\n",
              "      <td>989.31</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>12</th>\n",
              "      <td>gpt-4-0613</td>\n",
              "      <td>969.58</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>13</th>\n",
              "      <td>mistral-large-2407</td>\n",
              "      <td>966.64</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>14</th>\n",
              "      <td>gemini-1.5-pro-001</td>\n",
              "      <td>962.13</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>15</th>\n",
              "      <td>gemini-1.5-flash-001</td>\n",
              "      <td>945.94</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>16</th>\n",
              "      <td>llama-3.1-8B-instruct</td>\n",
              "      <td>887.48</td>\n",
              "    </tr>\n",
              "  </tbody>\n",
              "</table>\n",
              "</div>"
            ],
            "text/plain": [
              "                         Model  Elo rating\n",
              "1            open-mixtral-8x7b     1056.02\n",
              "2       llama-3.1-70B-instruct     1049.77\n",
              "3           open-mixtral-8x22b     1048.15\n",
              "4   claude-3-5-sonnet-20240620     1037.36\n",
              "5       claude-3-opus-20240229     1025.12\n",
              "6            gpt-4o-2024-05-13     1020.90\n",
              "7            gpt-4o-2024-08-06     1017.88\n",
              "8       gpt-4o-mini-2024-07-18     1014.26\n",
              "9       gpt-4-turbo-2024-04-09     1011.87\n",
              "10     claude-3-haiku-20240307      997.59\n",
              "11     llama-3.1-405B-instruct      989.31\n",
              "12                  gpt-4-0613      969.58\n",
              "13          mistral-large-2407      966.64\n",
              "14          gemini-1.5-pro-001      962.13\n",
              "15        gemini-1.5-flash-001      945.94\n",
              "16       llama-3.1-8B-instruct      887.48"
            ]
          },
          "execution_count": 135,
          "metadata": {},
          "output_type": "execute_result"
        }
      ],
      "source": [
        "model_rankings = compute_mle_elo_models(modelratings)\n",
        "preety_print_model_ratings(model_rankings)"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "4a0lMksEH-NY"
      },
      "source": [
        " ### Maximum Likelihood Estimation with [Bradley-Terry model](https://en.wikipedia.org/wiki/Bradley%E2%80%93Terry_model) (Tools)\n",
        "\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 136,
      "metadata": {
        "id": "DtrCkZFLIDo4"
      },
      "outputs": [],
      "source": [
        "import matplotlib.pyplot as plt\n",
        "import numpy as np\n",
        "import math\n",
        "import pandas as pd\n",
        "from sklearn.linear_model import LogisticRegression\n",
        "from IPython.display import display\n",
        "\n",
        "\n",
        "def compute_mle_elo_tools(df, SCALE=400, BASE=10, INIT_RATING=1000, sample_weight=None):\n",
        "    ptbl_a_win = pd.pivot_table(\n",
        "        df[df[\"rating\"] == \"A is better\"],\n",
        "        index=\"leftSubcomponent\",\n",
        "        columns=\"rightSubcomponent\",\n",
        "        aggfunc=\"size\",\n",
        "        fill_value=0,\n",
        "    )\n",
        "    all_tools = pd.Index(ptbl_a_win.index.union(ptbl_a_win.columns))\n",
        "\n",
        "    # Reindex the pivot table to be square, filling missing values with 0\n",
        "    print(ptbl_a_win.shape)\n",
        "\n",
        "    ptbl_a_win = ptbl_a_win.reindex(index=all_tools, columns=all_tools, fill_value=0)\n",
        "    print(ptbl_a_win.shape)\n",
        "\n",
        "\n",
        "\n",
        "    # Handle the case where there are no ties\n",
        "    if sum(df[\"rating\"].isin([\"Tie\", \"Both are bad\"])) == 0:\n",
        "        ptbl_tie = pd.DataFrame(0, index=ptbl_a_win.index, columns=ptbl_a_win.columns)\n",
        "    else:\n",
        "        ptbl_tie = pd.pivot_table(\n",
        "            df[df[\"rating\"].isin([\"Tie\", \"Both are bad\"])],\n",
        "            index=\"leftSubcomponent\",\n",
        "            columns=\"rightSubcomponent\",\n",
        "            aggfunc=\"size\",\n",
        "            fill_value=0,\n",
        "        )\n",
        "        # Get all unique tools from both leftAgent and rightAgent columns\n",
        "        all_tools = pd.Index(ptbl_tie.index.union(ptbl_tie.columns))\n",
        "\n",
        "        # Reindex the pivot table to be square, filling missing values with 0\n",
        "        ptbl_tie = ptbl_tie.reindex(index=all_tools, columns=all_tools, fill_value=0)\n",
        "        ptbl_tie = ptbl_tie + ptbl_tie.T\n",
        "\n",
        "    ptbl_b_win = pd.pivot_table(\n",
        "        df[df[\"rating\"] == \"B is better\"],\n",
        "        index=\"leftSubcomponent\",\n",
        "        columns=\"rightSubcomponent\",\n",
        "        aggfunc=\"size\",\n",
        "        fill_value=0,\n",
        "    )\n",
        "    print(ptbl_b_win.shape)\n",
        "\n",
        "    all_tools = pd.Index(ptbl_b_win.index.union(ptbl_b_win.columns))\n",
        "\n",
        "    # Reindex the pivot table to be square, filling missing values with 0\n",
        "    ptbl_b_win = ptbl_b_win.reindex(index=all_tools, columns=all_tools, fill_value=0)\n",
        "    print(ptbl_b_win.shape)\n",
        "\n",
        "    # Combine pivot tables\n",
        "    ptbl_win = ptbl_a_win * 2 + ptbl_b_win.T * 2 + ptbl_tie\n",
        "\n",
        "    # Visualize the combined pivot table\n",
        "    # visualize_ptbl_win(ptbl_win)\n",
        "    tools = pd.Series(np.arange(len(ptbl_win.index)), index=ptbl_win.index)\n",
        "\n",
        "    p = len(tools)\n",
        "    X = np.zeros([p * (p - 1) * 2, p])\n",
        "    Y = np.zeros(p * (p - 1) * 2)\n",
        "\n",
        "    cur_row = 0\n",
        "    sample_weights = []\n",
        "    for m_a in ptbl_win.index:\n",
        "        for m_b in ptbl_win.columns:\n",
        "            if m_a == m_b:\n",
        "                continue\n",
        "            # Skip if NaN\n",
        "            if math.isnan(ptbl_win.loc[m_a, m_b]) or math.isnan(ptbl_win.loc[m_b, m_a]):\n",
        "                continue\n",
        "            X[cur_row, tools[m_a]] = +math.log(BASE)\n",
        "            X[cur_row, tools[m_b]] = -math.log(BASE)\n",
        "            Y[cur_row] = 1.0\n",
        "            sample_weights.append(ptbl_win.loc[m_a, m_b])\n",
        "\n",
        "            X[cur_row + 1, tools[m_a]] = math.log(BASE)\n",
        "            X[cur_row + 1, tools[m_b]] = -math.log(BASE)\n",
        "            Y[cur_row + 1] = 0.0\n",
        "            sample_weights.append(ptbl_win.loc[m_b, m_a])\n",
        "            cur_row += 2\n",
        "    X = X[:cur_row]\n",
        "    Y = Y[:cur_row]\n",
        "\n",
        "    lr = LogisticRegression(fit_intercept=False, penalty=None, tol=1e-6)\n",
        "    lr.fit(X, Y, sample_weight=sample_weights)\n",
        "    elo_scores = SCALE * lr.coef_[0] + INIT_RATING\n",
        "    return pd.Series(elo_scores, index=tools.index).sort_values(ascending=False)"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 137,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 1000
        },
        "id": "N8PVUAqVIFU0",
        "outputId": "2d4a3bd3-7bbe-4155-c625-c19d051d555e"
      },
      "outputs": [
        {
          "name": "stdout",
          "output_type": "stream",
          "text": [
            "(26, 33)\n",
            "(35, 35)\n",
            "(26, 29)\n",
            "(32, 32)\n"
          ]
        },
        {
          "data": {
            "text/html": [
              "<div>\n",
              "<style scoped>\n",
              "    .dataframe tbody tr th:only-of-type {\n",
              "        vertical-align: middle;\n",
              "    }\n",
              "\n",
              "    .dataframe tbody tr th {\n",
              "        vertical-align: top;\n",
              "    }\n",
              "\n",
              "    .dataframe thead th {\n",
              "        text-align: right;\n",
              "    }\n",
              "</style>\n",
              "<table border=\"1\" class=\"dataframe\">\n",
              "  <thead>\n",
              "    <tr style=\"text-align: right;\">\n",
              "      <th></th>\n",
              "      <th>Model</th>\n",
              "      <th>Elo rating</th>\n",
              "    </tr>\n",
              "  </thead>\n",
              "  <tbody>\n",
              "    <tr>\n",
              "      <th>1</th>\n",
              "      <td>tavily-search</td>\n",
              "      <td>1222.21</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>2</th>\n",
              "      <td>pandas</td>\n",
              "      <td>1188.21</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>3</th>\n",
              "      <td>brave-search</td>\n",
              "      <td>1154.33</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>4</th>\n",
              "      <td>asknews</td>\n",
              "      <td>1119.67</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>5</th>\n",
              "      <td>google-serper</td>\n",
              "      <td>1108.85</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>6</th>\n",
              "      <td>pubmed</td>\n",
              "      <td>1100.57</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>7</th>\n",
              "      <td>arxiv</td>\n",
              "      <td>1095.66</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>8</th>\n",
              "      <td>alpha-vantage</td>\n",
              "      <td>1095.19</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>9</th>\n",
              "      <td>wikipedia</td>\n",
              "      <td>1094.33</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>10</th>\n",
              "      <td>dall-e</td>\n",
              "      <td>1068.83</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>11</th>\n",
              "      <td>calculator</td>\n",
              "      <td>1062.63</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>12</th>\n",
              "      <td>yahoo-finance-news</td>\n",
              "      <td>1045.44</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>13</th>\n",
              "      <td>google-lens</td>\n",
              "      <td>1030.86</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>14</th>\n",
              "      <td>wolfram-alpha</td>\n",
              "      <td>1007.87</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>15</th>\n",
              "      <td>python-repl</td>\n",
              "      <td>1006.55</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>16</th>\n",
              "      <td>yelp</td>\n",
              "      <td>1000.00</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>17</th>\n",
              "      <td>exa-search</td>\n",
              "      <td>1000.00</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>18</th>\n",
              "      <td>gmail</td>\n",
              "      <td>1000.00</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>19</th>\n",
              "      <td>riza-code-interpreter</td>\n",
              "      <td>1000.00</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>20</th>\n",
              "      <td>golden-query</td>\n",
              "      <td>1000.00</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>21</th>\n",
              "      <td>llamaindex-code-interpreter</td>\n",
              "      <td>1000.00</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>22</th>\n",
              "      <td>graphql</td>\n",
              "      <td>1000.00</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>23</th>\n",
              "      <td>PDFReader</td>\n",
              "      <td>1000.00</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>24</th>\n",
              "      <td>shell</td>\n",
              "      <td>1000.00</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>25</th>\n",
              "      <td>openapi</td>\n",
              "      <td>1000.00</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>26</th>\n",
              "      <td>requests</td>\n",
              "      <td>981.31</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>27</th>\n",
              "      <td>youtube</td>\n",
              "      <td>957.49</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>28</th>\n",
              "      <td>tasks</td>\n",
              "      <td>951.32</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>29</th>\n",
              "      <td>you-search</td>\n",
              "      <td>940.40</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>30</th>\n",
              "      <td>custom-functions</td>\n",
              "      <td>915.91</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>31</th>\n",
              "      <td>sql</td>\n",
              "      <td>912.23</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>32</th>\n",
              "      <td>open-weather-map</td>\n",
              "      <td>878.07</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>33</th>\n",
              "      <td>openai-code-interpreter</td>\n",
              "      <td>874.47</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>34</th>\n",
              "      <td>nasa</td>\n",
              "      <td>855.19</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>35</th>\n",
              "      <td>file-search</td>\n",
              "      <td>843.04</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>36</th>\n",
              "      <td>eden-ai</td>\n",
              "      <td>783.85</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>37</th>\n",
              "      <td>google-jobs</td>\n",
              "      <td>705.51</td>\n",
              "    </tr>\n",
              "  </tbody>\n",
              "</table>\n",
              "</div>"
            ],
            "text/plain": [
              "                          Model  Elo rating\n",
              "1                 tavily-search     1222.21\n",
              "2                        pandas     1188.21\n",
              "3                  brave-search     1154.33\n",
              "4                       asknews     1119.67\n",
              "5                 google-serper     1108.85\n",
              "6                        pubmed     1100.57\n",
              "7                         arxiv     1095.66\n",
              "8                 alpha-vantage     1095.19\n",
              "9                     wikipedia     1094.33\n",
              "10                       dall-e     1068.83\n",
              "11                   calculator     1062.63\n",
              "12           yahoo-finance-news     1045.44\n",
              "13                  google-lens     1030.86\n",
              "14                wolfram-alpha     1007.87\n",
              "15                  python-repl     1006.55\n",
              "16                         yelp     1000.00\n",
              "17                   exa-search     1000.00\n",
              "18                        gmail     1000.00\n",
              "19        riza-code-interpreter     1000.00\n",
              "20                 golden-query     1000.00\n",
              "21  llamaindex-code-interpreter     1000.00\n",
              "22                      graphql     1000.00\n",
              "23                    PDFReader     1000.00\n",
              "24                        shell     1000.00\n",
              "25                      openapi     1000.00\n",
              "26                     requests      981.31\n",
              "27                      youtube      957.49\n",
              "28                        tasks      951.32\n",
              "29                   you-search      940.40\n",
              "30             custom-functions      915.91\n",
              "31                          sql      912.23\n",
              "32             open-weather-map      878.07\n",
              "33      openai-code-interpreter      874.47\n",
              "34                         nasa      855.19\n",
              "35                  file-search      843.04\n",
              "36                      eden-ai      783.85\n",
              "37                  google-jobs      705.51"
            ]
          },
          "execution_count": 137,
          "metadata": {},
          "output_type": "execute_result"
        }
      ],
      "source": [
        "tool_rankings = compute_mle_elo_tools(toolratings)\n",
        "preety_print_model_ratings(tool_rankings)"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "5iQ_flv_JJcl"
      },
      "source": [
        " ### Maximum Likelihood Estimation with [Bradley-Terry model](https://en.wikipedia.org/wiki/Bradley%E2%80%93Terry_model) (Frameworks)\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 138,
      "metadata": {
        "id": "dkItl6kaJSYK"
      },
      "outputs": [],
      "source": [
        "import matplotlib.pyplot as plt\n",
        "import numpy as np\n",
        "import math\n",
        "import pandas as pd\n",
        "from sklearn.linear_model import LogisticRegression\n",
        "from IPython.display import display\n",
        "\n",
        "\n",
        "def compute_mle_elo_frameworks(df, SCALE=400, BASE=10, INIT_RATING=1000, sample_weight=None):\n",
        "    ptbl_a_win = pd.pivot_table(\n",
        "        df[df[\"rating\"] == \"A is better\"],\n",
        "        index=\"leftSubcomponent\",\n",
        "        columns=\"rightSubcomponent\",\n",
        "        aggfunc=\"size\",\n",
        "        fill_value=0,\n",
        "    )\n",
        "    all_frameworks = pd.Index(ptbl_a_win.index.union(ptbl_a_win.columns))\n",
        "\n",
        "    # Reindex the pivot table to be square, filling missing values with 0\n",
        "    print(ptbl_a_win.shape)\n",
        "\n",
        "    ptbl_a_win = ptbl_a_win.reindex(index=all_frameworks, columns=all_frameworks, fill_value=0)\n",
        "    print(ptbl_a_win.shape)\n",
        "\n",
        "    all_frameworks = pd.Index(ptbl_a_win.index.union(ptbl_a_win.columns))\n",
        "\n",
        "\n",
        "    # Handle the case where there are no ties\n",
        "    if sum(df[\"rating\"].isin([\"Tie\", \"Both are bad\"])) == 0:\n",
        "        ptbl_tie = pd.DataFrame(0, index=ptbl_a_win.index, columns=ptbl_a_win.columns)\n",
        "    else:\n",
        "        ptbl_tie = pd.pivot_table(\n",
        "            df[df[\"rating\"].isin([\"Tie\", \"Both are bad\"])],\n",
        "            index=\"leftSubcomponent\",\n",
        "            columns=\"rightSubcomponent\",\n",
        "            aggfunc=\"size\",\n",
        "            fill_value=0,\n",
        "        )\n",
        "        # Get all unique tools from both leftAgent and rightAgent columns\n",
        "        all_frameworks = pd.Index(ptbl_tie.index.union(ptbl_tie.columns))\n",
        "\n",
        "        # Reindex the pivot table to be square, filling missing values with 0\n",
        "        ptbl_tie = ptbl_tie.reindex(index=all_frameworks, columns=all_frameworks, fill_value=0)\n",
        "        ptbl_tie = ptbl_tie + ptbl_tie.T\n",
        "\n",
        "    ptbl_b_win = pd.pivot_table(\n",
        "        df[df[\"rating\"] == \"B is better\"],\n",
        "        index=\"leftSubcomponent\",\n",
        "        columns=\"rightSubcomponent\",\n",
        "        aggfunc=\"size\",\n",
        "        fill_value=0,\n",
        "    )\n",
        "    print(ptbl_b_win.shape)\n",
        "\n",
        "    all_frameworks = pd.Index(ptbl_b_win.index.union(ptbl_b_win.columns))\n",
        "\n",
        "    # Reindex the pivot table to be square, filling missing values with 0\n",
        "    ptbl_b_win = ptbl_b_win.reindex(index=all_frameworks, columns=all_frameworks, fill_value=0)\n",
        "    print(ptbl_b_win.shape)\n",
        "\n",
        "    # Combine pivot tables\n",
        "    ptbl_win = ptbl_a_win * 2 + ptbl_b_win.T * 2 + ptbl_tie\n",
        "\n",
        "    # Visualize the combined pivot table\n",
        "    # visualize_ptbl_win(ptbl_win)\n",
        "    frameworks = pd.Series(np.arange(len(ptbl_win.index)), index=ptbl_win.index)\n",
        "\n",
        "    p = len(frameworks)\n",
        "    X = np.zeros([p * (p - 1) * 2, p])\n",
        "    Y = np.zeros(p * (p - 1) * 2)\n",
        "\n",
        "    cur_row = 0\n",
        "    sample_weights = []\n",
        "    for m_a in ptbl_win.index:\n",
        "        for m_b in ptbl_win.columns:\n",
        "            if m_a == m_b:\n",
        "                continue\n",
        "            # Skip if NaN\n",
        "            if math.isnan(ptbl_win.loc[m_a, m_b]) or math.isnan(ptbl_win.loc[m_b, m_a]):\n",
        "                continue\n",
        "            X[cur_row, frameworks[m_a]] = +math.log(BASE)\n",
        "            X[cur_row, frameworks[m_b]] = -math.log(BASE)\n",
        "            Y[cur_row] = 1.0\n",
        "            sample_weights.append(ptbl_win.loc[m_a, m_b])\n",
        "\n",
        "            X[cur_row + 1, frameworks[m_a]] = math.log(BASE)\n",
        "            X[cur_row + 1, frameworks[m_b]] = -math.log(BASE)\n",
        "            Y[cur_row + 1] = 0.0\n",
        "            sample_weights.append(ptbl_win.loc[m_b, m_a])\n",
        "            cur_row += 2\n",
        "    X = X[:cur_row]\n",
        "    Y = Y[:cur_row]\n",
        "\n",
        "    lr = LogisticRegression(fit_intercept=False, penalty=None, tol=1e-6)\n",
        "    lr.fit(X, Y, sample_weight=sample_weights)\n",
        "    elo_scores = SCALE * lr.coef_[0] + INIT_RATING\n",
        "    return pd.Series(elo_scores, index=frameworks.index).sort_values(ascending=False)"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 139,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 307
        },
        "id": "xfq0MdxtKxc8",
        "outputId": "29bf7e71-c4a5-45bf-9bff-f05228c423ef"
      },
      "outputs": [
        {
          "name": "stdout",
          "output_type": "stream",
          "text": [
            "(6, 6)\n",
            "(6, 6)\n",
            "(5, 6)\n",
            "(6, 6)\n"
          ]
        },
        {
          "data": {
            "text/html": [
              "<div>\n",
              "<style scoped>\n",
              "    .dataframe tbody tr th:only-of-type {\n",
              "        vertical-align: middle;\n",
              "    }\n",
              "\n",
              "    .dataframe tbody tr th {\n",
              "        vertical-align: top;\n",
              "    }\n",
              "\n",
              "    .dataframe thead th {\n",
              "        text-align: right;\n",
              "    }\n",
              "</style>\n",
              "<table border=\"1\" class=\"dataframe\">\n",
              "  <thead>\n",
              "    <tr style=\"text-align: right;\">\n",
              "      <th></th>\n",
              "      <th>Model</th>\n",
              "      <th>Elo rating</th>\n",
              "    </tr>\n",
              "  </thead>\n",
              "  <tbody>\n",
              "    <tr>\n",
              "      <th>1</th>\n",
              "      <td>langchain</td>\n",
              "      <td>1123.10</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>2</th>\n",
              "      <td>anthropic tool use</td>\n",
              "      <td>1117.37</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>3</th>\n",
              "      <td>llamaindex</td>\n",
              "      <td>1040.58</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>4</th>\n",
              "      <td>openai assistants</td>\n",
              "      <td>978.60</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>5</th>\n",
              "      <td>crewai</td>\n",
              "      <td>915.86</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>6</th>\n",
              "      <td>composio</td>\n",
              "      <td>824.48</td>\n",
              "    </tr>\n",
              "  </tbody>\n",
              "</table>\n",
              "</div>"
            ],
            "text/plain": [
              "                Model  Elo rating\n",
              "1           langchain     1123.10\n",
              "2  anthropic tool use     1117.37\n",
              "3          llamaindex     1040.58\n",
              "4   openai assistants      978.60\n",
              "5              crewai      915.86\n",
              "6            composio      824.48"
            ]
          },
          "execution_count": 139,
          "metadata": {},
          "output_type": "execute_result"
        }
      ],
      "source": [
        "framework_rankings = compute_mle_elo_frameworks(frameworkratings)\n",
        "preety_print_model_ratings(framework_rankings)"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "HKRqfShekNe2"
      },
      "source": [
        "# ELO Rating Combined"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "U8SorNnmzVNf"
      },
      "source": [
        "To improve the combined subcomponent ratings, we should create a larger feature matrix where each row represents a battle and each column represents the combination of models, tools, and frameworks used. This will allow us to account for confounding factors between frequently co-occurring components (e.g., a tool frequently paired with a stronger model) and ensure we make full use of the data, even in battles where models are the same but tools differ. This approach will yield more accurate estimates by considering all component combinations in a unified framework.\n",
        "\n",
        "\n",
        "\n",
        "\n",
        "\n",
        "\n",
        "\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 140,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "E3hwMbwzlt5Q",
        "outputId": "c6458a0b-3f99-488b-d726-9210816a270d"
      },
      "outputs": [
        {
          "name": "stdout",
          "output_type": "stream",
          "text": [
            "                                          leftAgent  \\\n",
            "0  langchain brave-search agent (gpt-4o-2024-08-06)   \n",
            "1  langchain brave-search agent (gpt-4o-2024-08-06)   \n",
            "2  langchain brave-search agent (gpt-4o-2024-08-06)   \n",
            "3  langchain brave-search agent (gpt-4o-2024-08-06)   \n",
            "4   sql agent plotter langchain (gpt-4o-2024-08-06)   \n",
            "\n",
            "                                          rightAgent    tools_left  \\\n",
            "0  langchain google-serper search agent (gpt-4o-2...  brave-search   \n",
            "1  langchain google-serper search agent (gpt-4o-2...  brave-search   \n",
            "2  langchain google-serper search agent (gpt-4o-2...  brave-search   \n",
            "3  langchain google-serper search agent (gpt-4o-2...  brave-search   \n",
            "4  langchain ArXiv Article Fetcher (gpt-4o-2024-0...           sql   \n",
            "\n",
            "     tools_right        models_left       models_right frameworks_left  \\\n",
            "0  google-serper  gpt-4o-2024-08-06  gpt-4o-2024-08-06       langchain   \n",
            "1  google-serper  gpt-4o-2024-08-06  gpt-4o-2024-08-06       langchain   \n",
            "2  google-serper  gpt-4o-2024-08-06  gpt-4o-2024-08-06       langchain   \n",
            "3  google-serper  gpt-4o-2024-08-06  gpt-4o-2024-08-06       langchain   \n",
            "4          arxiv  gpt-4o-2024-08-06  gpt-4o-2024-08-06       langchain   \n",
            "\n",
            "  frameworks_right       rating  \n",
            "0        langchain  A is better  \n",
            "1        langchain  A is better  \n",
            "2        langchain  A is better  \n",
            "3        langchain  B is better  \n",
            "4        langchain  B is better  \n",
            "Number of unique agents: 447\n"
          ]
        }
      ],
      "source": [
        "def extract_agent_info(agent_data):\n",
        "    return {\n",
        "        'name': agent_data['Agent name'],\n",
        "        'frameworks': agent_data['Frameworks'],\n",
        "        'models': agent_data['Models'],\n",
        "        'tools': agent_data['Tools']\n",
        "    }\n",
        "\n",
        "# Extract information for Agent A and Agent B\n",
        "ratings['Agent_A_info'] = ratings['Agent_A'].apply(extract_agent_info)\n",
        "ratings['Agent_B_info'] = ratings['Agent_B'].apply(extract_agent_info)\n",
        "\n",
        "# Prepare the ratings data for Elo calculation\n",
        "def prepare_ratings_data(ratings_df):\n",
        "    # Create a new DataFrame with the required structure\n",
        "    prepared_df = pd.DataFrame({\n",
        "        'leftAgent': ratings_df['Agent_A'].apply(lambda x: x['Agent name']),\n",
        "        'rightAgent': ratings_df['Agent_B'].apply(lambda x: x['Agent name']),\n",
        "        'tools_left': ratings_df['Agent_A_info'].apply(lambda x: x['tools']),\n",
        "        'tools_right': ratings_df['Agent_B_info'].apply(lambda x: x['tools']),\n",
        "        'models_left': ratings_df['Agent_A_info'].apply(lambda x: x['models']),\n",
        "        'models_right': ratings_df['Agent_B_info'].apply(lambda x: x['models']),\n",
        "        'frameworks_left': ratings_df['Agent_A_info'].apply(lambda x: x['frameworks']),\n",
        "        'frameworks_right': ratings_df['Agent_B_info'].apply(lambda x: x['frameworks']),\n",
        "        'rating': ratings_df['Rating']\n",
        "    })\n",
        "\n",
        "    # Instead of exploding, we'll create a single row for each battle\n",
        "    prepared_df['tools_left'] = prepared_df['tools_left'].apply(lambda x: x[0] if x else None)\n",
        "    prepared_df['tools_right'] = prepared_df['tools_right'].apply(lambda x: x[0] if x else None)\n",
        "    prepared_df['models_left'] = prepared_df['models_left'].apply(lambda x: x[0] if x else None)\n",
        "    prepared_df['models_right'] = prepared_df['models_right'].apply(lambda x: x[0] if x else None)\n",
        "    prepared_df['frameworks_left'] = prepared_df['frameworks_left'].apply(lambda x: x[0] if x else None)\n",
        "    prepared_df['frameworks_right'] = prepared_df['frameworks_right'].apply(lambda x: x[0] if x else None)\n",
        "\n",
        "    return prepared_df\n",
        "\n",
        "ratings_prepared = prepare_ratings_data(ratings)\n",
        "\n",
        "# Now you can use ratings_prepared for your Elo calculations or other analyses\n",
        "\n",
        "# Example: Print the first few rows of the prepared data\n",
        "print(ratings_prepared.head())\n",
        "\n",
        "# Example: Get unique agents\n",
        "unique_agents = set(ratings_prepared['leftAgent'].unique()) | set(ratings_prepared['rightAgent'].unique())\n",
        "print(f\"Number of unique agents: {len(unique_agents)}\")"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 141,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "MzrS_kUOXyTe",
        "outputId": "8afcd08c-8d41-4baf-fc8e-774d45277a4f"
      },
      "outputs": [
        {
          "name": "stdout",
          "output_type": "stream",
          "text": [
            "Tool Elo Ratings:\n",
            " riza-code-interpreter         1223.83\n",
            "riza-code-interpreter         1223.83\n",
            "dall-e                        1136.77\n",
            "dall-e                        1136.77\n",
            "yahoo-finance-news            1078.91\n",
            "                                ...  \n",
            "google-lens                    796.22\n",
            "google-lens                    796.22\n",
            "file-search                    782.44\n",
            "file-search                    782.44\n",
            "llamaindex-code-interpreter    493.64\n",
            "Length: 65, dtype: float64\n",
            "Model Elo Ratings:\n",
            " llama-3.1-70B-instruct       1064.08\n",
            "llama-3.1-70B-instruct       1064.08\n",
            "open-mixtral-8x7b            1051.06\n",
            "open-mixtral-8x7b            1051.06\n",
            "open-mixtral-8x22b           1041.03\n",
            "open-mixtral-8x22b           1041.03\n",
            "gpt-4-turbo-2024-04-09       1037.96\n",
            "gpt-4-turbo-2024-04-09       1037.96\n",
            "gpt-4o-2024-08-06            1036.37\n",
            "gpt-4o-2024-08-06            1036.37\n",
            "gpt-4o-2024-05-13            1032.93\n",
            "gpt-4o-2024-05-13            1032.93\n",
            "claude-3-opus-20240229       1026.32\n",
            "claude-3-opus-20240229       1026.32\n",
            "llama-3.1-405B-instruct      1025.89\n",
            "llama-3.1-405B-instruct      1025.89\n",
            "claude-3-5-sonnet-20240620   1022.99\n",
            "claude-3-5-sonnet-20240620   1022.99\n",
            "gpt-4o-mini-2024-07-18       1021.28\n",
            "gpt-4o-mini-2024-07-18       1021.28\n",
            "gpt-4-0613                   1019.33\n",
            "gpt-4-0613                   1019.33\n",
            "claude-3-haiku-20240307      1013.31\n",
            "claude-3-haiku-20240307      1013.31\n",
            "mistral-large-2407            998.16\n",
            "mistral-large-2407            998.16\n",
            "gemini-1.5-flash-001          997.05\n",
            "gemini-1.5-flash-001          997.05\n",
            "gemini-1.5-pro-001            984.84\n",
            "gemini-1.5-pro-001            984.84\n",
            "llama-3.1-8B-instruct         972.32\n",
            "llama-3.1-8B-instruct         972.32\n",
            "gemini-1.5-pro-002            740.74\n",
            "gemini-1.5-flash-002          634.00\n",
            "dtype: float64\n",
            "Framework Elo Ratings:\n",
            " llamaindex           1093.15\n",
            "llamaindex           1093.15\n",
            "langchain            1036.33\n",
            "langchain            1036.33\n",
            "anthropic tool use   1026.27\n",
            "anthropic tool use   1026.27\n",
            "crewai                967.11\n",
            "crewai                967.11\n",
            "openai assistants     963.17\n",
            "openai assistants     963.17\n",
            "dtype: float64\n"
          ]
        }
      ],
      "source": [
        "import numpy as np\n",
        "import pandas as pd\n",
        "from sklearn.linear_model import LogisticRegression\n",
        "import math\n",
        "\n",
        "def compute_mle_elo_combined(ratings_df, SCALE=400, BASE=10, INIT_RATING=1000):\n",
        "    # Create index for all unique components (tools, models, frameworks)\n",
        "    all_tools = pd.Index(ratings_df['tools_left'].dropna().unique().tolist() + ratings_df['tools_right'].dropna().unique().tolist())\n",
        "    all_models = pd.Index(ratings_df['models_left'].dropna().unique().tolist() + ratings_df['models_right'].dropna().unique().tolist())\n",
        "    all_frameworks = pd.Index(ratings_df['frameworks_left'].dropna().unique().tolist() + ratings_df['frameworks_right'].dropna().unique().tolist())\n",
        "\n",
        "    all_components = pd.Index(all_tools.tolist() + all_models.tolist() + all_frameworks.tolist())\n",
        "\n",
        "    p = len(all_components)\n",
        "    X = np.zeros([ratings_df.shape[0] * 2, p])  # 2 rows per battle\n",
        "    Y = np.zeros(ratings_df.shape[0] * 2)\n",
        "    sample_weights = []\n",
        "    cur_row = 0\n",
        "\n",
        "    for i, row in ratings_df.iterrows():\n",
        "        # Get the indices of the subcomponents\n",
        "        left_tool_idx = all_components.get_loc(row['tools_left']) if pd.notna(row['tools_left']) else None\n",
        "        right_tool_idx = all_components.get_loc(row['tools_right']) if pd.notna(row['tools_right']) else None\n",
        "        left_model_idx = all_components.get_loc(row['models_left']) if pd.notna(row['models_left']) else None\n",
        "        right_model_idx = all_components.get_loc(row['models_right']) if pd.notna(row['models_right']) else None\n",
        "        left_framework_idx = all_components.get_loc(row['frameworks_left']) if pd.notna(row['frameworks_left']) else None\n",
        "        right_framework_idx = all_components.get_loc(row['frameworks_right']) if pd.notna(row['frameworks_right']) else None\n",
        "\n",
        "        # Check for tool category overlap\n",
        "        left_tool_categories = set(row['tool_categories_left'])\n",
        "        right_tool_categories = set(row['tool_categories_right'])\n",
        "        tool_category_overlap = len(left_tool_categories.intersection(right_tool_categories)) > 0\n",
        "\n",
        "        # Set the design matrix for the left and right subcomponents\n",
        "        if left_tool_idx is not None and right_tool_idx is not None and tool_category_overlap:\n",
        "            X[cur_row, left_tool_idx] = +math.log(BASE)\n",
        "            X[cur_row, right_tool_idx] = -math.log(BASE)\n",
        "        if left_model_idx is not None and right_model_idx is not None:\n",
        "            X[cur_row, left_model_idx] = +math.log(BASE)\n",
        "            X[cur_row, right_model_idx] = -math.log(BASE)\n",
        "        if left_framework_idx is not None and right_framework_idx is not None:\n",
        "            X[cur_row, left_framework_idx] = +math.log(BASE)\n",
        "            X[cur_row, right_framework_idx] = -math.log(BASE)\n",
        "\n",
        "        # Define the outcome for this row\n",
        "        Y[cur_row] = 1.0 if row['rating'] == 'A is better' else 0.0\n",
        "        sample_weights.append(1)\n",
        "        cur_row += 1\n",
        "\n",
        "        # Reverse for the next row\n",
        "        if left_tool_idx is not None and right_tool_idx is not None and tool_category_overlap:\n",
        "            X[cur_row, left_tool_idx] = -math.log(BASE)\n",
        "            X[cur_row, right_tool_idx] = +math.log(BASE)\n",
        "        if left_model_idx is not None and right_model_idx is not None:\n",
        "            X[cur_row, left_model_idx] = -math.log(BASE)\n",
        "            X[cur_row, right_model_idx] = +math.log(BASE)\n",
        "        if left_framework_idx is not None and right_framework_idx is not None:\n",
        "            X[cur_row, left_framework_idx] = -math.log(BASE)\n",
        "            X[cur_row, right_framework_idx] = +math.log(BASE)\n",
        "\n",
        "        # Define the outcome for the reversed row\n",
        "        Y[cur_row] = 0.0 if row['rating'] == 'A is better' else 1.0\n",
        "        sample_weights.append(1)\n",
        "        cur_row += 1\n",
        "\n",
        "    X = X[:cur_row]\n",
        "    Y = Y[:cur_row]\n",
        "\n",
        "    # Logistic Regression\n",
        "    lr = LogisticRegression(fit_intercept=False, tol=1e-6, penalty='l2', C=1.0, solver='liblinear')\n",
        "    lr.fit(X, Y, sample_weight=sample_weights)\n",
        "\n",
        "    # Scale the coefficients to get Elo ratings\n",
        "    elo_scores = SCALE * lr.coef_[0] + INIT_RATING\n",
        "\n",
        "    # Split back the scores\n",
        "    tool_elo = pd.Series(elo_scores[:len(all_tools)], index=all_tools)\n",
        "    model_elo = pd.Series(elo_scores[len(all_tools):len(all_tools) + len(all_models)], index=all_models)\n",
        "    framework_elo = pd.Series(elo_scores[len(all_tools) + len(all_models):], index=all_frameworks)\n",
        "\n",
        "    return tool_elo.sort_values(ascending=False), model_elo.sort_values(ascending=False), framework_elo.sort_values(ascending=False)\n",
        "\n",
        "# Prepare the ratings data for Elo calculation\n",
        "def prepare_ratings_data(ratings_df):\n",
        "    prepared_df = pd.DataFrame({\n",
        "        'leftAgent': ratings_df['Agent_A'].apply(lambda x: x['Agent name']),\n",
        "        'rightAgent': ratings_df['Agent_B'].apply(lambda x: x['Agent name']),\n",
        "        'tools_left': ratings_df['Agent_A_info'].apply(lambda x: x['tools'][0] if x['tools'] else None),\n",
        "        'tools_right': ratings_df['Agent_B_info'].apply(lambda x: x['tools'][0] if x['tools'] else None),\n",
        "        'models_left': ratings_df['Agent_A_info'].apply(lambda x: x['models'][0] if x['models'] else None),\n",
        "        'models_right': ratings_df['Agent_B_info'].apply(lambda x: x['models'][0] if x['models'] else None),\n",
        "        'frameworks_left': ratings_df['Agent_A_info'].apply(lambda x: x['frameworks'][0] if x['frameworks'] else None),\n",
        "        'frameworks_right': ratings_df['Agent_B_info'].apply(lambda x: x['frameworks'][0] if x['frameworks'] else None),\n",
        "        'tool_categories_left': ratings_df['Agent_A'].apply(lambda x: x['Tool Categories']),\n",
        "        'tool_categories_right': ratings_df['Agent_B'].apply(lambda x: x['Tool Categories']),\n",
        "        'rating': ratings_df['Rating']\n",
        "    })\n",
        "    return prepared_df\n",
        "\n",
        "# Prepare the data and calculate the Elo ratings\n",
        "ratings_prepared = prepare_ratings_data(ratings)\n",
        "tool_elo, model_elo, framework_elo = compute_mle_elo_combined(ratings_prepared)\n",
        "\n",
        "# Output the results\n",
        "print(\"Tool Elo Ratings:\\n\", tool_elo)\n",
        "print(\"Model Elo Ratings:\\n\", model_elo)\n",
        "print(\"Framework Elo Ratings:\\n\", framework_elo)"
      ]
    }
  ],
  "metadata": {
    "colab": {
      "provenance": []
    },
    "kernelspec": {
      "display_name": "Python 3",
      "name": "python3"
    },
    "language_info": {
      "codemirror_mode": {
        "name": "ipython",
        "version": 3
      },
      "file_extension": ".py",
      "mimetype": "text/x-python",
      "name": "python",
      "nbconvert_exporter": "python",
      "pygments_lexer": "ipython3",
      "version": "3.10.6"
    }
  },
  "nbformat": 4,
  "nbformat_minor": 0
}
